Files
tools/calc_utils/bin/gsub
2026-01-11 11:43:35 +08:00

177 lines
4.3 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
set -u
# Usage: gsub <jobname>
job=${1:-}
if [[ -z "$job" ]]; then
echo "Usage: $0 <jobname-without-extension>"
exit 1
fi
# ==========================================
# 0. 安全检测函数 (Safety Check)
# ==========================================
check_dangerous_path() {
local path="${1:-}"
# 1. Empty check
if [[ -z "$path" ]]; then
echo "Error: Empty path is dangerous for deletion." >&2
return 1
fi
# 2. Root check
if [[ "$path" == "/" ]]; then
echo "Error: Root path '/' is dangerous for deletion." >&2
return 1
fi
# 3. Space check (optional, but good for safety)
if [[ "$path" =~ ^[[:space:]]+$ ]]; then
echo "Error: Whitespace path is dangerous." >&2
return 1
fi
return 0
}
# ==========================================
# 1. 检查运行环境 (Check Host)
# ==========================================
# 如果不是 cluster尝试通过 SSH 远程调用
host_short=$(hostname -s 2>/dev/null || hostname)
if [[ "$host_short" != "cluster" ]]; then
# 假设本地挂载路径 /mnt/home 对应远程 /home (根据原脚本逻辑调整)
cur_dir=$(pwd)
remote_dir="${cur_dir//\/mnt\/home/\/home}"
# 定位当前脚本并转换为远程路径
# 获取脚本所在目录的绝对路径
script_dir=$(cd "$(dirname "$0")" && pwd)
script_name=$(basename "$0")
local_script="$script_dir/$script_name"
# 同样对脚本路径进行替换
remote_script="${local_script//\/mnt\/home/\/home}"
# 尝试在远程执行自己
echo "Running remotely on cluster: $remote_script" >&2
ssh cluster "cd '$remote_dir' && '$remote_script' '$job'"
exit $?
fi
# ==========================================
# 2. 准备作业 (Prepare Job)
# ==========================================
gin_file="$job.gin"
if [[ ! -f "$gin_file" ]]; then
echo "Error: $gin_file not found in $(pwd)"
exit 2
fi
# 解析配置确定资源 (Parse Proc)
# 查找 %NProcShared=XX
proc=$(sed -n 's/^%NProcShared=\([0-9]\+\).*$/\1/pI' "$gin_file" | head -n 1)
queue=""
ppn=""
if [[ "$proc" == "32" ]]; then
queue="n32"
ppn="32"
elif [[ "$proc" == "20" ]]; then
queue="n20"
ppn="20"
else
echo "Error: Unsupported NProcShared=$proc in $gin_file. Only 20 or 32 allowed."
exit 1
fi
# 清理旧文件 (Clean up old output)
if [[ -f "$job.out" ]]; then
# 原脚本逻辑:休眠并删除
# echo "Warning: $job.out exists. Deleting..." >&2
# 使用安全检查
if check_dangerous_path "$job.out"; then
rm "$job.out"
else
echo "Skipping deletion of unsafe path: $job.out" >&2
exit 1
fi
fi
# ==========================================
# 3. 生成作业脚本 (.job)
# ==========================================
job_file="$job.job"
# 使用 heredoc 动态生成 PBS 脚本
# 整合了原 g16_32.pbs 的内容和 gsub32 的追加内容
cat > "$job_file" <<EOF
#!/bin/sh
#PBS -l nodes=1:ppn=$ppn
#PBS -q $queue
#PBS -j oe
#PBS -N $job
cd \$PBS_O_WORKDIR
# Define Safety Check Function in Job Script
check_rm_path() {
p="\$1"
# Empty check
if [ -z "\$p" ]; then
echo "Refusing to delete empty path"
return 1
fi
# Root check
if [ "\$p" = "/" ]; then
echo "Refusing to delete root path"
return 1
fi
return 0
}
export g16root=/share/apps/soft
source \$g16root/g16/bsd/g16.profile
# Create Scratch Directory
if [ -n "\$USER" ] && [ -n "\$PBS_JOBID" ]; then
mkdir -p /scr/\$USER/\$PBS_JOBID
export GAUSS_SCRDIR=/scr/\$USER/\$PBS_JOBID
else
echo "Error: USER or PBS_JOBID not set. Cannot setup scratch."
exit 1
fi
NODES=\`cat \$PBS_NODEFILE | uniq\`
echo "--------------------------------------------------------"
echo " JOBID: \$PBS_JOBID"
echo " The job was started at \`date\`"
echo " The job was running at \$NODES."
echo "--------------------------------------------------------"
# Run G16 Job
echo "Executing: g16 < $gin_file > $job.out"
g16 < $gin_file > $job.out
echo "--------------------------------------------------------"
echo " The job was finished at \`date\`"
echo "--------------------------------------------------------"
# Delete the tmp File (Cleanup Scratch)
echo "Cleaning up \$GAUSS_SCRDIR"
if check_rm_path "\$GAUSS_SCRDIR"; then
rm -rf "\$GAUSS_SCRDIR"
fi
EOF
# ==========================================
# 4. 提交作业 (Submit)
# ==========================================
# qsub 会输出 Job ID例如 12345.cluster
qsub "$job_file"