This commit is contained in:
2025-09-06 23:47:17 +08:00
parent 70e46124a0
commit f539792fd1

View File

@ -11,6 +11,10 @@ from pickle import dumps, loads
from typing import Optional from typing import Optional
import atexit import atexit
import re import re
import threading
import queue
import psutil
from concurrent.futures import ThreadPoolExecutor, as_completed
root = None root = None
TRAIN = False TRAIN = False
@ -25,11 +29,29 @@ CFG = {
"ffmpeg": "ffmpeg", "ffmpeg": "ffmpeg",
"manual": None, "manual": None,
"video_ext": [".mp4", ".mkv"], "video_ext": [".mp4", ".mkv"],
"train": False "train": False,
"compress_dir_name": "compress",
"resolution": "-1:1080",
"fps": "30",
"esti_data_file": "estiminate_data.dat",
"test_video_resolution": "1920x1080",
"test_video_fps": "30",
"test_video_input": "compress_video_test.mp4",
"test_video_output": "compressed_video_test.mp4",
"max_concurrent_instances": 2,
"cpu_monitor_interval": 3, # CPU监控间隔
"cpu_monitor_duration": 30, # 统计持续时间5分钟
} }
esti=None # :tuple[list[int],list[float]] esti=None # :tuple[list[int],list[float]]
# CPU监控相关全局变量
ffmpeg_processes = {} # 存储活动的ffmpeg进程
cpu_stats = {"system": [], "ffmpeg": []} # CPU使用率统计
cpu_monitor_thread = None
cpu_monitor_lock = threading.Lock()
current_instances = 0
instance_lock = threading.Lock()
def get_cmd(video_path,output_file): def get_cmd(video_path,output_file):
if CFG["manual"] is not None: if CFG["manual"] is not None:
command=[ command=[
@ -46,23 +68,31 @@ def get_cmd(video_path,output_file):
CFG["ffmpeg"], CFG["ffmpeg"],
"-hide_banner", "-hide_banner",
"-i", video_path, "-i", video_path,
"-vf", "scale=-1:1080", ]
if CFG['resolution'] is not None:
command.extend([
"-vf", f"scale={CFG['resolution']}",])
command.extend([
"-c:v", CFG["codec"], "-c:v", CFG["codec"],
"-b:v", CFG["bitrate"], "-b:v", CFG["bitrate"],
"-r","30", "-r",CFG["fps"],
"-y", "-y",
] ])
else: else:
command = [ command = [
CFG["ffmpeg"], CFG["ffmpeg"],
"-hide_banner", "-hide_banner",
"-i", video_path, "-i", video_path,
"-vf", "scale=-1:1080", ]
if CFG['resolution'] is not None:
command.extend([
"-vf", f"scale={CFG['resolution']}",])
command.extend([
"-c:v", CFG["codec"], "-c:v", CFG["codec"],
"-global_quality", str(CFG["crf"]), "-global_quality", str(CFG["crf"]),
"-r","30", "-r",CFG["fps"],
"-y", "-y",
] ])
command.extend(CFG["extra"]) command.extend(CFG["extra"])
command.append(output_file) command.append(output_file)
@ -70,7 +100,7 @@ def get_cmd(video_path,output_file):
def train_init(): def train_init():
global esti_data,TRAIN,data_file global esti_data,TRAIN,data_file
data_file = Path("estiminate_data.dat") data_file = Path(CFG["esti_data_file"])
if data_file.exists(): if data_file.exists():
esti_data=loads(data_file.read_bytes()) esti_data=loads(data_file.read_bytes())
if not isinstance(esti_data,tuple): if not isinstance(esti_data,tuple):
@ -187,6 +217,99 @@ def fmt_time(t:float|int) -> str:
else: else:
return f"{round(t)}s" return f"{round(t)}s"
def cpu_monitor():
"""CPU监控线程函数"""
global cpu_stats
while True:
try:
# 获取系统CPU使用率
system_cpu = psutil.cpu_percent(interval=1)
# 获取所有ffmpeg进程的CPU使用率
ffmpeg_cpu_total = 0
active_processes = []
with cpu_monitor_lock:
for proc_info in ffmpeg_processes.values():
try:
proc = proc_info['process']
if proc.is_running():
# print(proc,proc.cpu_percent() / psutil.cpu_count())
ffmpeg_cpu_total += proc.cpu_percent() / psutil.cpu_count()
active_processes.append(proc_info)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# 更新统计数据
with cpu_monitor_lock:
cpu_stats["system"].append(system_cpu)
cpu_stats["ffmpeg"].append(ffmpeg_cpu_total)
# 保持最近5分钟的数据
max_samples = CFG["cpu_monitor_duration"] // CFG["cpu_monitor_interval"]
if len(cpu_stats["system"]) > max_samples:
cpu_stats["system"] = cpu_stats["system"][-max_samples:]
if len(cpu_stats["ffmpeg"]) > max_samples:
cpu_stats["ffmpeg"] = cpu_stats["ffmpeg"][-max_samples:]
logging.debug(f"CPU监控: 系统={system_cpu:.1f}%, FFmpeg总计={ffmpeg_cpu_total:.1f}%, 活动进程={len(active_processes)}")
except KeyboardInterrupt as e:
raise e
except Exception as e:
logging.debug(f"CPU监控异常: {e}")
# 等待下一次监控
threading.Event().wait(CFG["cpu_monitor_interval"])
def start_cpu_monitor():
"""启动CPU监控线程"""
global cpu_monitor_thread
if cpu_monitor_thread is None or not cpu_monitor_thread.is_alive():
cpu_monitor_thread = threading.Thread(target=cpu_monitor, daemon=True)
cpu_monitor_thread.start()
logging.info("CPU监控线程已启动")
def get_cpu_usage_stats():
"""获取CPU使用率统计"""
with cpu_monitor_lock:
if not cpu_stats["system"] or not cpu_stats["ffmpeg"]:
return None, None
system_avg = sum(cpu_stats["system"]) / len(cpu_stats["system"])
ffmpeg_avg = sum(cpu_stats["ffmpeg"]) / len(cpu_stats["ffmpeg"])
return system_avg, ffmpeg_avg
def should_increase_instances():
"""判断是否应该增加实例数"""
system_avg, ffmpeg_avg = get_cpu_usage_stats()
if system_avg is None or ffmpeg_avg is None:
return False
# 条件: 系统CPU - FFmpeg CPU > FFmpeg CPU * 2 + 0.1
available_cpu = 100 - system_avg
threshold = ffmpeg_avg # 10% = 0.1 * 100
logging.debug(f"CPU统计: 系统平均={system_avg:.1f}%, FFmpeg平均={ffmpeg_avg:.1f}%, 可用={available_cpu:.1f}%, 阈值={threshold:.1f}%")
return available_cpu > threshold
def register_ffmpeg_process(proc_id, process):
"""注册ffmpeg进程用于监控"""
with cpu_monitor_lock:
ffmpeg_processes[proc_id] = {
'process': psutil.Process(process.pid),
'start_time': time()
}
def unregister_ffmpeg_process(proc_id):
"""注销ffmpeg进程"""
with cpu_monitor_lock:
if proc_id in ffmpeg_processes:
del ffmpeg_processes[proc_id]
def func(sz:int,src=False): def func(sz:int,src=False):
if TRAIN: if TRAIN:
try: try:
@ -216,21 +339,15 @@ def func(sz:int,src=False):
logging.debug("esti time exception", exc_info=e) logging.debug("esti time exception", exc_info=e)
return -1 if src else "NaN" return -1 if src else "NaN"
def process_video(video_path: Path, compress_dir:Optional[Path]=None ,update_func=None): def process_video(video_path: Path, compress_dir:Optional[Path]=None, update_func=None, proc_id=None):
global esti_data global esti_data, current_instances
use=None use=None
sz=video_path.stat().st_size//(1024*1024) sz=video_path.stat().st_size//(1024*1024)
# if esti is not None or TRAIN:
# use = func(sz,True)
# logging.info(f"开始处理文件: {video_path.relative_to(root)},大小{sz}M预计{fmt_time(use)}")
# else:
# logging.info(f"开始处理文件: {video_path.relative_to(root)},大小{sz}M")
bgn=time() bgn=time()
if compress_dir is None: if compress_dir is None:
# 在视频文件所在目录下创建 compress 子目录(如果不存在) # 在视频文件所在目录下创建 compress 子目录(如果不存在)
compress_dir = video_path.parent / "compress" compress_dir = video_path.parent / CFG["compress_dir_name"]
else: else:
compress_dir /= video_path.parent.relative_to(root) compress_dir /= video_path.parent.relative_to(root)
@ -246,6 +363,11 @@ def process_video(video_path: Path, compress_dir:Optional[Path]=None ,update_fun
command = get_cmd(video_path_str,output_file) command = get_cmd(video_path_str,output_file)
try: try:
with instance_lock:
current_instances += 1
logging.debug(f"启动FFmpeg进程 {proc_id}: {video_path.name}")
result = subprocess.Popen( result = subprocess.Popen(
command, command,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
@ -254,17 +376,19 @@ def process_video(video_path: Path, compress_dir:Optional[Path]=None ,update_fun
text=True text=True
) )
# 注册进程用于CPU监控
if proc_id:
register_ffmpeg_process(proc_id, result)
while result.poll() is None: while result.poll() is None:
line = " " line = " "
while result.poll() is None and line[-1:] not in "\r\n": while result.poll() is None and line[-1:] not in "\r\n":
line+=result.stderr.read(1) line+=result.stderr.read(1)
# print(line[-1])
if 'warning' in line.lower(): if 'warning' in line.lower():
logging.warning(f"[FFmpeg]({video_path_str}): {line}") logging.warning(f"[FFmpeg {proc_id}]({video_path_str}): {line}")
elif 'error' in line.lower(): elif 'error' in line.lower():
logging.error(f"[FFmpeg]({video_path_str}): {line}") logging.error(f"[FFmpeg {proc_id}]({video_path_str}): {line}")
elif "frame=" in line: elif "frame=" in line:
# print(line,end="")
match = re.search(r"frame=\s*(\d+)",line) match = re.search(r"frame=\s*(\d+)",line)
if match: if match:
frame_number = int(match.group(1)) frame_number = int(match.group(1))
@ -272,9 +396,9 @@ def process_video(video_path: Path, compress_dir:Optional[Path]=None ,update_fun
update_func(frame_number) update_func(frame_number)
if result.returncode != 0: if result.returncode != 0:
logging.error(f"处理文件 {video_path_str} 失败,返回码: {result.returncode}cmd={' '.join(command)}") logging.error(f"处理文件 {video_path_str} 失败,返回码: {result.returncode}cmd={' '.join(map(str,command))}")
logging.error(result.stdout) logging.error(result.stdout.read())
logging.error(result.stderr) logging.error(result.stderr.read())
else: else:
logging.debug(f"文件处理成功: {video_path_str} -> {output_file}") logging.debug(f"文件处理成功: {video_path_str} -> {output_file}")
@ -282,13 +406,23 @@ def process_video(video_path: Path, compress_dir:Optional[Path]=None ,update_fun
if TRAIN: if TRAIN:
esti_data[0].append(sz) esti_data[0].append(sz)
esti_data[1].append(end-bgn) esti_data[1].append(end-bgn)
except KeyboardInterrupt as e:raise e
except Exception as e: except Exception as e:
logging.error(f"执行 ffmpeg 命令时发生异常, 文件:{str(video_path_str)}cmd={' '.join(map(str,command))}",exc_info=e) logging.error(f"执行 ffmpeg 命令时发生异常, 文件:{str(video_path_str)}cmd={' '.join(map(str,command))}",exc_info=e)
finally:
# 注销进程监控
if proc_id:
unregister_ffmpeg_process(proc_id)
with instance_lock:
current_instances -= 1
logging.debug(f"FFmpeg进程 {proc_id} 已结束")
return use return use
def traverse_directory(root_dir: Path): def traverse_directory(root_dir: Path):
global current_instances
video_extensions = set(CFG["video_ext"]) video_extensions = set(CFG["video_ext"])
sm=None sm=None
if esti is not None: if esti is not None:
@ -296,7 +430,7 @@ def traverse_directory(root_dir: Path):
logging.info(f"正在估算时间(当存在大量小文件时,估算值将会很离谱)") logging.info(f"正在估算时间(当存在大量小文件时,估算值将会很离谱)")
sm = 0 sm = 0
for file in root_dir.rglob("*"): for file in root_dir.rglob("*"):
if file.parent.name == "compress":continue if file.parent.name.lower() == CFG["compress_dir_name"].lower():continue
if file.is_file() and file.suffix.lower() in video_extensions: if file.is_file() and file.suffix.lower() in video_extensions:
sz=file.stat().st_size//(1024*1024) sz=file.stat().st_size//(1024*1024)
tmp = func(sz,True) tmp = func(sz,True)
@ -307,84 +441,196 @@ def traverse_directory(root_dir: Path):
sm += tmp sm += tmp
logging.info(f"预估用时:{fmt_time(sm)}") logging.info(f"预估用时:{fmt_time(sm)}")
else: else:
# logging.info("正在估算视频帧数,用于显示进度。") # 获取视频文件列表和帧数信息
with Progress() as prog: video_files = []
task = prog.add_task("正在获取视频信息",total=len(list(root_dir.rglob("*")))) que = list(root_dir.glob("*"))
frames:dict[Path,float] = {} while que:
for file in root_dir.rglob("*"): d = que.pop()
prog.advance(task) for file in d.glob("*"):
if file.parent.name == "Compress":continue if file.parent.name == CFG["compress_dir_name"] or file.name == CFG["compress_dir_name"]:
continue
if file.is_file() and file.suffix.lower() in video_extensions: if file.is_file() and file.suffix.lower() in video_extensions:
video_files.append(file)
elif file.is_dir():
que.append(file)
# exit()
if not video_files:
logging.warning("未找到需要处理的视频文件")
return
# 获取视频信息
with Progress() as prog:
task = prog.add_task("正在获取视频信息", total=len(video_files))
frames: dict[Path, float] = {}
for file in video_files:
prog.advance(task)
cmd = f'ffprobe -v error -select_streams v:0 -show_entries stream=avg_frame_rate,duration -of default=nokey=1:noprint_wrappers=1'.split() cmd = f'ffprobe -v error -select_streams v:0 -show_entries stream=avg_frame_rate,duration -of default=nokey=1:noprint_wrappers=1'.split()
cmd.append(str(file)) cmd.append(str(file))
proc = subprocess.run(cmd, shell=True, capture_output=True, text=True) proc = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if proc.returncode != 0: if proc.returncode != 0:
logging.debug(f"无法获取视频信息: {file}, 返回码: {proc.returncode}") logging.debug(f"无法获取视频信息: {file}, 返回码: {proc.returncode}")
logging.debug(proc.stdout)
logging.debug(proc.stderr)
frames[file] = 0 frames[file] = 0
continue continue
if proc.stdout.strip(): if proc.stdout.strip():
try:
avg_frame_rate, duration = proc.stdout.strip().split('\n') avg_frame_rate, duration = proc.stdout.strip().split('\n')
tmp = avg_frame_rate.split('/') tmp = avg_frame_rate.split('/')
avg_frame_rate = float(tmp[0]) / float(tmp[1]) avg_frame_rate = float(tmp[0]) / float(tmp[1])
if duration == "N/A": if duration == "N/A":
duration = 0 duration = 0
logging.debug(f"无法获取视频信息: {file}, 时长为N/A默认使用0s。运行时进度条将出现异常。") logging.debug(f"无法获取视频信息: {file}, 时长为N/A默认使用0s")
duration = float(duration) duration = float(duration)
frames[file] = duration * avg_frame_rate frames[file] = duration * avg_frame_rate
except (ValueError, IndexError) as e:
logging.debug(f"解析视频信息失败: {file}, 错误: {e}")
frames[file] = 0
logging.debug(f"开始遍历目录: {root_dir}, 共{len(frames)}个视频文件")
logging.debug(f"开始遍历目录: {root_dir}") # 启动CPU监控
# 定义需要处理的视频后缀(忽略大小写) start_cpu_monitor()
# 创建进度条
with Progress() as prog: with Progress() as prog:
task = prog.add_task("总进度",total=sm if sm is not None else sum(frames.values())) total_frames = sum(frames.values())
for file in frames.keys(): main_task = prog.add_task("总进度", total=total_frames if total_frames > 0 else len(frames))
# if "Compress" in file.relative_to(root_dir).parents:continue
if file.is_file() and file.suffix.lower() in video_extensions:
filename = file.relative_to(root_dir)
if frames[file] == 0:
logging.warning("当前文件时长获取失败进度条持续为0为正常现象。")
cur = prog.add_task(f"{filename}")
else:
cur = prog.add_task(f"{filename}",total=frames[file])
with prog._lock:
tmp = prog._tasks[task]
completed_start = tmp.completed
def update_progress(x): # 创建文件队列
if frames[file] == 0: file_queue = queue.Queue()
prog.update(cur,description=f"{filename} 已处理{x}") for file in frames.keys():
file_queue.put(file)
# 进度跟踪
progress_trackers = {}
completed_files = 0
total_completed_frames = 0
def create_progress_updater(file_path, task_id):
def update_progress(frame_count):
nonlocal total_completed_frames
if file_path in progress_trackers:
old_frames = progress_trackers[file_path]
diff = frame_count - old_frames
total_completed_frames += diff
else: else:
prog.update(cur,completed=x) total_completed_frames += frame_count
prog.update(task, completed=completed_start+x) progress_trackers[file_path] = frame_count
if frames[file_path] > 0:
prog.update(task_id, completed=frame_count)
else:
prog.update(task_id, description=f"{file_path.relative_to(root_dir)} 已处理{frame_count}")
# 更新总进度
if total_frames > 0:
prog.update(main_task, completed=total_completed_frames)
return update_progress
def process_file_worker():
nonlocal completed_files
while True:
try:
file = file_queue.get(timeout=1)
except queue.Empty:
break
filename = file.relative_to(root_dir)
# 创建文件级进度条
if frames[file] == 0:
file_task = prog.add_task(f"{filename}")
else:
file_task = prog.add_task(f"{filename}", total=frames[file])
progress_updater = create_progress_updater(file, file_task)
# 处理视频
proc_id = f"worker_{threading.current_thread().ident}_{completed_files}"
if CFG["save_to"] == "single": if CFG["save_to"] == "single":
t = process_video(file, root_dir/"Compress", update_progress) process_video(file, root_dir/"Compress", progress_updater, proc_id)
else: else:
t = process_video(file, update_progress) process_video(file, None, progress_updater, proc_id)
# 更新完成计数
with instance_lock:
completed_files += 1
if total_frames == 0: # 如果没有总帧数,按文件数计算
prog.update(main_task, completed=completed_files)
prog.stop_task(cur) # 移除文件级进度条
prog.remove_task(cur) prog.remove_task(file_task)
if t is None: file_queue.task_done()
prog.update(task,completed=completed_start+frames[file])
# 动态管理线程数
active_threads = []
max_workers = CFG["max_concurrent_instances"]
def manage_workers():
nonlocal active_threads
while completed_files < len(frames) or any(t.is_alive() for t in active_threads):
# 清理已完成的线程
active_threads = [t for t in active_threads if t.is_alive()]
# 检查是否需要增加实例
current_worker_count = len(active_threads)
if current_worker_count < max_workers and not file_queue.empty():
# 检查CPU使用率运行5分钟后开始检查
should_add_worker = False
if len(cpu_stats["system"]) >= 10: # 至少有5分钟的数据
if current_worker_count >= 1: # 已有实例运行
should_add_worker = should_increase_instances()
if should_add_worker:
logging.info("CPU资源充足启动第二个压缩实例")
else: else:
prog.advance(task,t) should_add_worker = False
if should_add_worker:
worker_thread = threading.Thread(target=process_file_worker, daemon=True)
worker_thread.start()
active_threads.append(worker_thread)
logging.debug(f"启动新的工作线程,当前活动线程数: {len(active_threads)}")
threading.Event().wait(5) # 每5秒检查一次
# 等待所有线程完成
for thread in active_threads:
thread.join()
# 启动第一个工作线程
if not file_queue.empty():
first_worker = threading.Thread(target=process_file_worker, daemon=True)
first_worker.start()
active_threads.append(first_worker)
logging.info("启动第一个压缩实例")
# 启动线程管理器
manager_thread = threading.Thread(target=manage_workers, daemon=True)
manager_thread.start()
# 等待管理线程完成
manager_thread.join()
logging.info(f"所有视频处理完成,共处理了 {completed_files} 个文件")
def test(): def test():
os.environ["PATH"] = Path(__file__).parent.as_posix() + os.pathsep + os.environ["PATH"] os.environ["PATH"] = Path(__file__).parent.as_posix() + os.pathsep + os.environ["PATH"]
try: try:
subprocess.run([CFG["ffmpeg"],"-version"],stdout=-3,stderr=-3).check_returncode() subprocess.run([CFG["ffmpeg"],"-version"],stdout=-3,stderr=-3).check_returncode()
except KeyboardInterrupt as e:raise e
except Exception as e: except Exception as e:
print(__file__) print(__file__)
logging.critical("无法运行ffmpeg") logging.critical("无法运行ffmpeg")
exit(-1) exit(-1)
try: try:
ret = subprocess.run( ret = subprocess.run(
"ffmpeg -hide_banner -f lavfi -i testsrc=duration=1:size=1920x1080:rate=30 -c:v libx264 -y -pix_fmt yuv420p compress_video_test.mp4".split(), f"ffmpeg -hide_banner -f lavfi -i testsrc=duration=1:size={CFG['test_video_resolution']}:rate={CFG['test_video_fps']} -c:v libx264 -y -pix_fmt yuv420p {CFG['test_video_input']}".split(),
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
text=True text=True
@ -394,7 +640,7 @@ def test():
logging.debug(ret.stdout) logging.debug(ret.stdout)
logging.debug(ret.stderr) logging.debug(ret.stderr)
ret.check_returncode() ret.check_returncode()
cmd = get_cmd("compress_video_test.mp4","compressed_video_test.mp4",) cmd = get_cmd(CFG["test_video_input"],CFG["test_video_output"],)
ret = subprocess.run( ret = subprocess.run(
cmd, cmd,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
@ -409,6 +655,7 @@ def test():
exit(-1) exit(-1)
os.remove("compress_video_test.mp4") os.remove("compress_video_test.mp4")
os.remove("compressed_video_test.mp4") os.remove("compressed_video_test.mp4")
except KeyboardInterrupt as e:raise e
except Exception as e: except Exception as e:
if os.path.exists("compress_video_test.mp4"): if os.path.exists("compress_video_test.mp4"):
os.remove("compress_video_test.mp4") os.remove("compress_video_test.mp4")
@ -425,6 +672,7 @@ def init_train():
# 从文件读取系数 # 从文件读取系数
coeffs_str = ESTI_FILE.read_text().strip().split(',') coeffs_str = ESTI_FILE.read_text().strip().split(',')
esti = [float(coeff) for coeff in coeffs_str] esti = [float(coeff) for coeff in coeffs_str]
except KeyboardInterrupt as e:raise e
except Exception as e: except Exception as e:
logging.warning(f"预测输出文件{str(ESTI_FILE)}存在但无法读取", exc_info=e) logging.warning(f"预测输出文件{str(ESTI_FILE)}存在但无法读取", exc_info=e)
@ -449,6 +697,7 @@ def main(_root = None):
import json import json
cfg:dict = json.loads(CFG_FILE.read_text()) cfg:dict = json.loads(CFG_FILE.read_text())
CFG.update(cfg) CFG.update(cfg)
except KeyboardInterrupt as e:raise e
except Exception as e: except Exception as e:
logging.warning("Invalid config file, ignored.") logging.warning("Invalid config file, ignored.")
logging.debug(e) logging.debug(e)
@ -463,7 +712,7 @@ def main(_root = None):
sys.exit(1) sys.exit(1)
root = Path(sys.argv[1]) root = Path(sys.argv[1])
if root.name == "compress": if root.name.lower() == CFG["compress_dir_name"].lower():
logging.critical("请修改目标目录名为非compress。") logging.critical("请修改目标目录名为非compress。")
logging.error("Error termination via invalid input.") logging.error("Error termination via invalid input.")
sys.exit(1) sys.exit(1)