Files
tools/calc_utils/bin/gsub_wait
2026-01-11 11:43:35 +08:00

117 lines
3.2 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -u
# Usage: gsub_wait <jobname>
job=${1:-}
if [[ -z "$job" ]]; then
echo "Usage: $0 <jobname-without-extension>"
exit 1
fi
# ==========================================
# 1. 提交任务 (Submit Job)
# ==========================================
# 确定 gsub 命令位置
# 优先查找当前目录下的 gsub否则查找 PATH
if [[ -x "./gsub" ]]; then
GSUB_CMD="./gsub"
else
GSUB_CMD="gsub"
fi
# 调用 gsub 并捕获输出
# 注意gsub 内部可能通过 SSH 在远程执行,最终返回 qsub 的输出
output=$($GSUB_CMD "$job")
echo "$output"
# ==========================================
# 2. 检查是否需要等待 (Check Silent Mode)
# ==========================================
# 如果 GSUB_SILENT 为 1则不进行监控直接退出
if [[ "${GSUB_SILENT:-0}" == "1" ]]; then
exit 0
fi
# ==========================================
# 3. 监控任务进度 (Monitor Progress)
# ==========================================
# 尝试提取 Job ID (例如: 67147.cluster -> 67147)
jobid_full=$(echo "$output" | grep -oE '[0-9]+\.cluster|[0-9]+' | head -n 1 || true)
if [[ -n "$jobid_full" ]]; then
jobid=${jobid_full%%.*}
# 准备参数
out_file="$job.out"
gin_file="$job.gin"
end_file="$job.job.o$jobid"
if [[ ! -f "$gin_file" ]]; then
# 如果 gin 文件找不到(可能是远程路径问题?),跳过监控
echo "Warning: $gin_file not found nearby. Skipping monitor."
exit 0
fi
# 计算 Total Steps: (--link1-- 数量) + 1
link_count=$(grep -c -- "--link1--" "$gin_file" || true)
total=$((link_count + 1))
cntDone=0
cntSCF=0
last_lines=0
echo "Monitoring Job $jobid..."
while true; do
# A. 检查 PBS 结束文件 (Job 完成标志)
if [[ -f "$end_file" ]]; then
echo "Job finished (found $end_file)."
break
fi
# B. 检查并读取 .out 输出文件
if [[ -f "$out_file" ]]; then
curr_lines=$(wc -l < "$out_file" 2>/dev/null || echo 0)
# 如果文件变小(被截断或重新生成),重置读取位置
if (( curr_lines < last_lines )); then last_lines=0; fi
if (( curr_lines > last_lines )); then
# 逐行处理新增内容
# 使用进程替换 < <(...) 避免管道导致的子shell变量丢失问题
while IFS= read -r line; do
# 检查 SCF Done
# 正则匹配: SCF Done: ... E ... = (数值) A.U.
if [[ "$line" =~ SCF[[:space:]]Done:.*E.*=[[:space:]]*([-0-9.]+)[[:space:]]*A\.U\. ]]; then
energy="${BASH_REMATCH[1]}"
cntSCF=$((cntSCF + 1))
echo "$job: SCF Done: $energy [$cntSCF] ($cntDone/$total)"
fi
# 检查 Termination
if [[ "$line" == *"termination of Gaussian"* ]]; then
cntDone=$((cntDone + 1))
echo "$job: task done ($cntDone/$total)"
fi
done < <(tail -n "+$((last_lines + 1))" "$out_file")
last_lines=$curr_lines
fi
fi
sleep 2
done
# C. 最终校验
if (( cntDone != total )); then
echo "Warning: cntDone ($cntDone) != total ($total)"
fi
else
echo "Could not parse Job ID from output. Monitor skipped."
fi