add README in calc utils
This commit is contained in:
176
calc_utils/bin/gsub
Normal file
176
calc_utils/bin/gsub
Normal file
@ -0,0 +1,176 @@
|
||||
#!/bin/bash
|
||||
set -u
|
||||
|
||||
# Usage: gsub <jobname>
|
||||
|
||||
job=${1:-}
|
||||
if [[ -z "$job" ]]; then
|
||||
echo "Usage: $0 <jobname-without-extension>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ==========================================
|
||||
# 0. 安全检测函数 (Safety Check)
|
||||
# ==========================================
|
||||
check_dangerous_path() {
|
||||
local path="${1:-}"
|
||||
|
||||
# 1. Empty check
|
||||
if [[ -z "$path" ]]; then
|
||||
echo "Error: Empty path is dangerous for deletion." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# 2. Root check
|
||||
if [[ "$path" == "/" ]]; then
|
||||
echo "Error: Root path '/' is dangerous for deletion." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# 3. Space check (optional, but good for safety)
|
||||
if [[ "$path" =~ ^[[:space:]]+$ ]]; then
|
||||
echo "Error: Whitespace path is dangerous." >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
# ==========================================
|
||||
# 1. 检查运行环境 (Check Host)
|
||||
# ==========================================
|
||||
# 如果不是 cluster,尝试通过 SSH 远程调用
|
||||
host_short=$(hostname -s 2>/dev/null || hostname)
|
||||
if [[ "$host_short" != "cluster" ]]; then
|
||||
# 假设本地挂载路径 /mnt/home 对应远程 /home (根据原脚本逻辑调整)
|
||||
cur_dir=$(pwd)
|
||||
remote_dir="${cur_dir//\/mnt\/home/\/home}"
|
||||
|
||||
# 定位当前脚本并转换为远程路径
|
||||
# 获取脚本所在目录的绝对路径
|
||||
script_dir=$(cd "$(dirname "$0")" && pwd)
|
||||
script_name=$(basename "$0")
|
||||
local_script="$script_dir/$script_name"
|
||||
|
||||
# 同样对脚本路径进行替换
|
||||
remote_script="${local_script//\/mnt\/home/\/home}"
|
||||
|
||||
# 尝试在远程执行自己
|
||||
echo "Running remotely on cluster: $remote_script" >&2
|
||||
ssh cluster "cd '$remote_dir' && '$remote_script' '$job'"
|
||||
exit $?
|
||||
fi
|
||||
|
||||
# ==========================================
|
||||
# 2. 准备作业 (Prepare Job)
|
||||
# ==========================================
|
||||
|
||||
gin_file="$job.gin"
|
||||
if [[ ! -f "$gin_file" ]]; then
|
||||
echo "Error: $gin_file not found in $(pwd)"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# 解析配置确定资源 (Parse Proc)
|
||||
# 查找 %NProcShared=XX
|
||||
proc=$(sed -n 's/^%NProcShared=\([0-9]\+\).*$/\1/pI' "$gin_file" | head -n 1)
|
||||
|
||||
queue=""
|
||||
ppn=""
|
||||
|
||||
if [[ "$proc" == "32" ]]; then
|
||||
queue="n32"
|
||||
ppn="32"
|
||||
elif [[ "$proc" == "20" ]]; then
|
||||
queue="n20"
|
||||
ppn="20"
|
||||
else
|
||||
echo "Error: Unsupported NProcShared=$proc in $gin_file. Only 20 or 32 allowed."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 清理旧文件 (Clean up old output)
|
||||
if [[ -f "$job.out" ]]; then
|
||||
# 原脚本逻辑:休眠并删除
|
||||
# echo "Warning: $job.out exists. Deleting..." >&2
|
||||
# 使用安全检查
|
||||
if check_dangerous_path "$job.out"; then
|
||||
rm "$job.out"
|
||||
else
|
||||
echo "Skipping deletion of unsafe path: $job.out" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# ==========================================
|
||||
# 3. 生成作业脚本 (.job)
|
||||
# ==========================================
|
||||
job_file="$job.job"
|
||||
|
||||
# 使用 heredoc 动态生成 PBS 脚本
|
||||
# 整合了原 g16_32.pbs 的内容和 gsub32 的追加内容
|
||||
cat > "$job_file" <<EOF
|
||||
#!/bin/sh
|
||||
#PBS -l nodes=1:ppn=$ppn
|
||||
#PBS -q $queue
|
||||
#PBS -j oe
|
||||
#PBS -N $job
|
||||
|
||||
cd \$PBS_O_WORKDIR
|
||||
|
||||
# Define Safety Check Function in Job Script
|
||||
check_rm_path() {
|
||||
p="\$1"
|
||||
# Empty check
|
||||
if [ -z "\$p" ]; then
|
||||
echo "Refusing to delete empty path"
|
||||
return 1
|
||||
fi
|
||||
# Root check
|
||||
if [ "\$p" = "/" ]; then
|
||||
echo "Refusing to delete root path"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
export g16root=/share/apps/soft
|
||||
source \$g16root/g16/bsd/g16.profile
|
||||
|
||||
# Create Scratch Directory
|
||||
if [ -n "\$USER" ] && [ -n "\$PBS_JOBID" ]; then
|
||||
mkdir -p /scr/\$USER/\$PBS_JOBID
|
||||
export GAUSS_SCRDIR=/scr/\$USER/\$PBS_JOBID
|
||||
else
|
||||
echo "Error: USER or PBS_JOBID not set. Cannot setup scratch."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
NODES=\`cat \$PBS_NODEFILE | uniq\`
|
||||
echo "--------------------------------------------------------"
|
||||
echo " JOBID: \$PBS_JOBID"
|
||||
echo " The job was started at \`date\`"
|
||||
echo " The job was running at \$NODES."
|
||||
echo "--------------------------------------------------------"
|
||||
|
||||
# Run G16 Job
|
||||
echo "Executing: g16 < $gin_file > $job.out"
|
||||
g16 < $gin_file > $job.out
|
||||
|
||||
echo "--------------------------------------------------------"
|
||||
echo " The job was finished at \`date\`"
|
||||
echo "--------------------------------------------------------"
|
||||
|
||||
# Delete the tmp File (Cleanup Scratch)
|
||||
echo "Cleaning up \$GAUSS_SCRDIR"
|
||||
if check_rm_path "\$GAUSS_SCRDIR"; then
|
||||
rm -rf "\$GAUSS_SCRDIR"
|
||||
fi
|
||||
|
||||
EOF
|
||||
|
||||
# ==========================================
|
||||
# 4. 提交作业 (Submit)
|
||||
# ==========================================
|
||||
# qsub 会输出 Job ID,例如 12345.cluster
|
||||
qsub "$job_file"
|
||||
116
calc_utils/bin/gsub_wait
Normal file
116
calc_utils/bin/gsub_wait
Normal file
@ -0,0 +1,116 @@
|
||||
#!/bin/bash
|
||||
set -u
|
||||
|
||||
# Usage: gsub_wait <jobname>
|
||||
|
||||
job=${1:-}
|
||||
if [[ -z "$job" ]]; then
|
||||
echo "Usage: $0 <jobname-without-extension>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ==========================================
|
||||
# 1. 提交任务 (Submit Job)
|
||||
# ==========================================
|
||||
|
||||
# 确定 gsub 命令位置
|
||||
# 优先查找当前目录下的 gsub,否则查找 PATH
|
||||
if [[ -x "./gsub" ]]; then
|
||||
GSUB_CMD="./gsub"
|
||||
else
|
||||
GSUB_CMD="gsub"
|
||||
fi
|
||||
|
||||
# 调用 gsub 并捕获输出
|
||||
# 注意:gsub 内部可能通过 SSH 在远程执行,最终返回 qsub 的输出
|
||||
output=$($GSUB_CMD "$job")
|
||||
echo "$output"
|
||||
|
||||
# ==========================================
|
||||
# 2. 检查是否需要等待 (Check Silent Mode)
|
||||
# ==========================================
|
||||
# 如果 GSUB_SILENT 为 1,则不进行监控,直接退出
|
||||
if [[ "${GSUB_SILENT:-0}" == "1" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ==========================================
|
||||
# 3. 监控任务进度 (Monitor Progress)
|
||||
# ==========================================
|
||||
|
||||
# 尝试提取 Job ID (例如: 67147.cluster -> 67147)
|
||||
jobid_full=$(echo "$output" | grep -oE '[0-9]+\.cluster|[0-9]+' | head -n 1 || true)
|
||||
|
||||
if [[ -n "$jobid_full" ]]; then
|
||||
jobid=${jobid_full%%.*}
|
||||
|
||||
# 准备参数
|
||||
out_file="$job.out"
|
||||
gin_file="$job.gin"
|
||||
end_file="$job.job.o$jobid"
|
||||
|
||||
if [[ ! -f "$gin_file" ]]; then
|
||||
# 如果 gin 文件找不到(可能是远程路径问题?),跳过监控
|
||||
echo "Warning: $gin_file not found nearby. Skipping monitor."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 计算 Total Steps: (--link1-- 数量) + 1
|
||||
link_count=$(grep -c -- "--link1--" "$gin_file" || true)
|
||||
total=$((link_count + 1))
|
||||
cntDone=0
|
||||
cntSCF=0
|
||||
|
||||
last_lines=0
|
||||
|
||||
echo "Monitoring Job $jobid..."
|
||||
|
||||
while true; do
|
||||
# A. 检查 PBS 结束文件 (Job 完成标志)
|
||||
if [[ -f "$end_file" ]]; then
|
||||
echo "Job finished (found $end_file)."
|
||||
break
|
||||
fi
|
||||
|
||||
# B. 检查并读取 .out 输出文件
|
||||
if [[ -f "$out_file" ]]; then
|
||||
curr_lines=$(wc -l < "$out_file" 2>/dev/null || echo 0)
|
||||
|
||||
# 如果文件变小(被截断或重新生成),重置读取位置
|
||||
if (( curr_lines < last_lines )); then last_lines=0; fi
|
||||
|
||||
if (( curr_lines > last_lines )); then
|
||||
# 逐行处理新增内容
|
||||
# 使用进程替换 < <(...) 避免管道导致的子shell变量丢失问题
|
||||
while IFS= read -r line; do
|
||||
|
||||
# 检查 SCF Done
|
||||
# 正则匹配: SCF Done: ... E ... = (数值) A.U.
|
||||
if [[ "$line" =~ SCF[[:space:]]Done:.*E.*=[[:space:]]*([-0-9.]+)[[:space:]]*A\.U\. ]]; then
|
||||
energy="${BASH_REMATCH[1]}"
|
||||
cntSCF=$((cntSCF + 1))
|
||||
echo "$job: SCF Done: $energy [$cntSCF] ($cntDone/$total)"
|
||||
fi
|
||||
|
||||
# 检查 Termination
|
||||
if [[ "$line" == *"termination of Gaussian"* ]]; then
|
||||
cntDone=$((cntDone + 1))
|
||||
echo "$job: task done ($cntDone/$total)"
|
||||
fi
|
||||
|
||||
done < <(tail -n "+$((last_lines + 1))" "$out_file")
|
||||
|
||||
last_lines=$curr_lines
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# C. 最终校验
|
||||
if (( cntDone != total )); then
|
||||
echo "Warning: cntDone ($cntDone) != total ($total)"
|
||||
fi
|
||||
else
|
||||
echo "Could not parse Job ID from output. Monitor skipped."
|
||||
fi
|
||||
Reference in New Issue
Block a user