add README in calc utils

This commit is contained in:
2026-01-11 11:43:35 +08:00
parent 5e94b202b5
commit 6f304a634c
3 changed files with 365 additions and 0 deletions

176
calc_utils/bin/gsub Normal file
View File

@ -0,0 +1,176 @@
#!/bin/bash
set -u
# Usage: gsub <jobname>
job=${1:-}
if [[ -z "$job" ]]; then
echo "Usage: $0 <jobname-without-extension>"
exit 1
fi
# ==========================================
# 0. 安全检测函数 (Safety Check)
# ==========================================
check_dangerous_path() {
local path="${1:-}"
# 1. Empty check
if [[ -z "$path" ]]; then
echo "Error: Empty path is dangerous for deletion." >&2
return 1
fi
# 2. Root check
if [[ "$path" == "/" ]]; then
echo "Error: Root path '/' is dangerous for deletion." >&2
return 1
fi
# 3. Space check (optional, but good for safety)
if [[ "$path" =~ ^[[:space:]]+$ ]]; then
echo "Error: Whitespace path is dangerous." >&2
return 1
fi
return 0
}
# ==========================================
# 1. 检查运行环境 (Check Host)
# ==========================================
# 如果不是 cluster尝试通过 SSH 远程调用
host_short=$(hostname -s 2>/dev/null || hostname)
if [[ "$host_short" != "cluster" ]]; then
# 假设本地挂载路径 /mnt/home 对应远程 /home (根据原脚本逻辑调整)
cur_dir=$(pwd)
remote_dir="${cur_dir//\/mnt\/home/\/home}"
# 定位当前脚本并转换为远程路径
# 获取脚本所在目录的绝对路径
script_dir=$(cd "$(dirname "$0")" && pwd)
script_name=$(basename "$0")
local_script="$script_dir/$script_name"
# 同样对脚本路径进行替换
remote_script="${local_script//\/mnt\/home/\/home}"
# 尝试在远程执行自己
echo "Running remotely on cluster: $remote_script" >&2
ssh cluster "cd '$remote_dir' && '$remote_script' '$job'"
exit $?
fi
# ==========================================
# 2. 准备作业 (Prepare Job)
# ==========================================
gin_file="$job.gin"
if [[ ! -f "$gin_file" ]]; then
echo "Error: $gin_file not found in $(pwd)"
exit 2
fi
# 解析配置确定资源 (Parse Proc)
# 查找 %NProcShared=XX
proc=$(sed -n 's/^%NProcShared=\([0-9]\+\).*$/\1/pI' "$gin_file" | head -n 1)
queue=""
ppn=""
if [[ "$proc" == "32" ]]; then
queue="n32"
ppn="32"
elif [[ "$proc" == "20" ]]; then
queue="n20"
ppn="20"
else
echo "Error: Unsupported NProcShared=$proc in $gin_file. Only 20 or 32 allowed."
exit 1
fi
# 清理旧文件 (Clean up old output)
if [[ -f "$job.out" ]]; then
# 原脚本逻辑:休眠并删除
# echo "Warning: $job.out exists. Deleting..." >&2
# 使用安全检查
if check_dangerous_path "$job.out"; then
rm "$job.out"
else
echo "Skipping deletion of unsafe path: $job.out" >&2
exit 1
fi
fi
# ==========================================
# 3. 生成作业脚本 (.job)
# ==========================================
job_file="$job.job"
# 使用 heredoc 动态生成 PBS 脚本
# 整合了原 g16_32.pbs 的内容和 gsub32 的追加内容
cat > "$job_file" <<EOF
#!/bin/sh
#PBS -l nodes=1:ppn=$ppn
#PBS -q $queue
#PBS -j oe
#PBS -N $job
cd \$PBS_O_WORKDIR
# Define Safety Check Function in Job Script
check_rm_path() {
p="\$1"
# Empty check
if [ -z "\$p" ]; then
echo "Refusing to delete empty path"
return 1
fi
# Root check
if [ "\$p" = "/" ]; then
echo "Refusing to delete root path"
return 1
fi
return 0
}
export g16root=/share/apps/soft
source \$g16root/g16/bsd/g16.profile
# Create Scratch Directory
if [ -n "\$USER" ] && [ -n "\$PBS_JOBID" ]; then
mkdir -p /scr/\$USER/\$PBS_JOBID
export GAUSS_SCRDIR=/scr/\$USER/\$PBS_JOBID
else
echo "Error: USER or PBS_JOBID not set. Cannot setup scratch."
exit 1
fi
NODES=\`cat \$PBS_NODEFILE | uniq\`
echo "--------------------------------------------------------"
echo " JOBID: \$PBS_JOBID"
echo " The job was started at \`date\`"
echo " The job was running at \$NODES."
echo "--------------------------------------------------------"
# Run G16 Job
echo "Executing: g16 < $gin_file > $job.out"
g16 < $gin_file > $job.out
echo "--------------------------------------------------------"
echo " The job was finished at \`date\`"
echo "--------------------------------------------------------"
# Delete the tmp File (Cleanup Scratch)
echo "Cleaning up \$GAUSS_SCRDIR"
if check_rm_path "\$GAUSS_SCRDIR"; then
rm -rf "\$GAUSS_SCRDIR"
fi
EOF
# ==========================================
# 4. 提交作业 (Submit)
# ==========================================
# qsub 会输出 Job ID例如 12345.cluster
qsub "$job_file"

116
calc_utils/bin/gsub_wait Normal file
View File

@ -0,0 +1,116 @@
#!/bin/bash
set -u
# Usage: gsub_wait <jobname>
job=${1:-}
if [[ -z "$job" ]]; then
echo "Usage: $0 <jobname-without-extension>"
exit 1
fi
# ==========================================
# 1. 提交任务 (Submit Job)
# ==========================================
# 确定 gsub 命令位置
# 优先查找当前目录下的 gsub否则查找 PATH
if [[ -x "./gsub" ]]; then
GSUB_CMD="./gsub"
else
GSUB_CMD="gsub"
fi
# 调用 gsub 并捕获输出
# 注意gsub 内部可能通过 SSH 在远程执行,最终返回 qsub 的输出
output=$($GSUB_CMD "$job")
echo "$output"
# ==========================================
# 2. 检查是否需要等待 (Check Silent Mode)
# ==========================================
# 如果 GSUB_SILENT 为 1则不进行监控直接退出
if [[ "${GSUB_SILENT:-0}" == "1" ]]; then
exit 0
fi
# ==========================================
# 3. 监控任务进度 (Monitor Progress)
# ==========================================
# 尝试提取 Job ID (例如: 67147.cluster -> 67147)
jobid_full=$(echo "$output" | grep -oE '[0-9]+\.cluster|[0-9]+' | head -n 1 || true)
if [[ -n "$jobid_full" ]]; then
jobid=${jobid_full%%.*}
# 准备参数
out_file="$job.out"
gin_file="$job.gin"
end_file="$job.job.o$jobid"
if [[ ! -f "$gin_file" ]]; then
# 如果 gin 文件找不到(可能是远程路径问题?),跳过监控
echo "Warning: $gin_file not found nearby. Skipping monitor."
exit 0
fi
# 计算 Total Steps: (--link1-- 数量) + 1
link_count=$(grep -c -- "--link1--" "$gin_file" || true)
total=$((link_count + 1))
cntDone=0
cntSCF=0
last_lines=0
echo "Monitoring Job $jobid..."
while true; do
# A. 检查 PBS 结束文件 (Job 完成标志)
if [[ -f "$end_file" ]]; then
echo "Job finished (found $end_file)."
break
fi
# B. 检查并读取 .out 输出文件
if [[ -f "$out_file" ]]; then
curr_lines=$(wc -l < "$out_file" 2>/dev/null || echo 0)
# 如果文件变小(被截断或重新生成),重置读取位置
if (( curr_lines < last_lines )); then last_lines=0; fi
if (( curr_lines > last_lines )); then
# 逐行处理新增内容
# 使用进程替换 < <(...) 避免管道导致的子shell变量丢失问题
while IFS= read -r line; do
# 检查 SCF Done
# 正则匹配: SCF Done: ... E ... = (数值) A.U.
if [[ "$line" =~ SCF[[:space:]]Done:.*E.*=[[:space:]]*([-0-9.]+)[[:space:]]*A\.U\. ]]; then
energy="${BASH_REMATCH[1]}"
cntSCF=$((cntSCF + 1))
echo "$job: SCF Done: $energy [$cntSCF] ($cntDone/$total)"
fi
# 检查 Termination
if [[ "$line" == *"termination of Gaussian"* ]]; then
cntDone=$((cntDone + 1))
echo "$job: task done ($cntDone/$total)"
fi
done < <(tail -n "+$((last_lines + 1))" "$out_file")
last_lines=$curr_lines
fi
fi
sleep 2
done
# C. 最终校验
if (( cntDone != total )); then
echo "Warning: cntDone ($cntDone) != total ($total)"
fi
else
echo "Could not parse Job ID from output. Monitor skipped."
fi