pdf_unlock: inline no recursion

2025-10-29 23:05:14 +08:00
parent abf64d9cd6
commit d454f8c8f4
1 changed files with 27 additions and 87 deletions
--- a/pdf_unlock/main.py
+++ b/pdf_unlock/main.py
@ -1,37 +1,9 @@
 import PyPDF2  # PyMuPDF
+from PyPDF2.generic import IndirectObject
 import sys
 from pathlib import Path
 from typing import Optional
-
-def add_outlines(
-        reader_obj:PyPDF2.PdfReader, 
-        writer_obj:PyPDF2.PdfWriter,
-        outlines:Optional[list] = None, 
-        parent=None
-    ):
-    
-    if outlines is None:
-        outlines = reader_obj.outline
-    last = None
-    for it in outlines:
-        if isinstance(it, list):
-            add_outlines(reader_obj, writer_obj ,it, last)
-            continue
-        
-        title = getattr(it, 'title', None)
-        if title is None:
-            try:
-                title = str(it)
-            except Exception as e:
-                raise e
-                continue
-
-        page_num = reader_obj.get_destination_page_number(it)
-        if page_num is None:
-            continue
-        
-        last = writer_obj.add_outline_item(title, page_num, parent=parent)
-    
+from itertools import repeat

 def copy_pdf_pages(input_path: Path, output_path: Path) -> bool:
    """
@ -48,19 +20,34 @@ def copy_pdf_pages(input_path: Path, output_path: Path) -> bool:
    try:
        with open(input_path, 'rb') as input_file:
            reader = PyPDF2.PdfReader(input_file)
-            
            writer = PyPDF2.PdfWriter()
            
            # 复制所有页面
            for page in reader.pages:
                writer.add_page(page)

-            # 复制书签（如果有）
-            
            try:
-                add_outlines(reader, writer)
+                que = list(zip(repeat(None),reader.outline))
+                last:Optional[IndirectObject] = None
+                for par, it in que:
+                    if isinstance(it, list):
+                        que.extend(zip(repeat(last),it))
+                        continue
+                    
+                    title = getattr(it, 'title', None)
+                    if title is None:
+                        try:
+                            title = str(it)
+                        except Exception:
+                            print(f"警告：无法获取书签标题，跳过该书签.")
+                            continue
+
+                    page_num = reader.get_destination_page_number(it)
+                    if page_num is None:
+                        continue
+                    
+                    last = writer.add_outline_item(title, page_num, parent=par)
            except Exception as e:
-                raise e
                print(f"警告：{input_path.name}书签处理失败.")
            
            # 写入新文件（不设置任何加密或限制）
@ -71,53 +58,8 @@ def copy_pdf_pages(input_path: Path, output_path: Path) -> bool:
            
    except Exception as e:
        print(f"移除PDF限制时发生错误: {e}")
-        raise e
        return False

-# def copy_pdf_pages(input_file, output_file):
-#     """
-#     读取PDF文件并逐页复制到新的PDF文件
-    
-#     Args:
-#         input_file (str): 输入PDF文件路径
-#         output_file (str): 输出PDF文件路径
-#     """
-#     try:
-#         # 检查输入文件是否存在
-#         if not os.path.exists(input_file):
-#             print(f"错误：输入文件 '{input_file}' 不存在")
-#             return False
-        
-#         # 打开输入PDF文件
-#         pdf_document = fitz.open(input_file)
-        
-#         # 创建新的PDF文档
-#         new_pdf = fitz.open()
-#         new_pdf.insert_pdf(pdf_document)
-        
-#         # 保存输出文件
-#         new_pdf.save(output_file)
-        
-#         # 关闭文档
-#         pdf_document.close()
-#         new_pdf.close()
-        
-#         return True
-            
-#     except FileNotFoundError:
-#         print(f"错误：找不到文件 '{input_file}'")
-#         return False
-#     except PermissionError:
-#         print(f"错误：权限不足，无法访问文件")
-#         return False
-#     except Exception as pdf_error:
-#         error_msg = str(pdf_error).lower()
-#         if "damaged" in error_msg or "corrupt" in error_msg:
-#             print(f"错误：PDF文件 '{input_file}' 已损坏")
-#         else:
-#             print(f"发生错误：{str(pdf_error)}")
-#         return False
-
 def main():
    """主函数"""
    if len(sys.argv) < 2:
@ -129,12 +71,12 @@ def main():
    else:
        input_path = Path(sys.argv[1])
        if input_path.is_dir():
-            files = list(input_path.glob("**/*.pdf"))
+            files = list(input_path.rglob("*.pdf"))
        else:
            print("正在处理",input_path.name)
            output_file = input_path.with_name(f"{input_path.stem}_decrypt.pdf")
-            success = copy_pdf_pages(input_path, output_file)
-            print("处理完成" if success else "处理失败")
+            suc = copy_pdf_pages(input_path, output_file)
+            print("处理完成" if suc else "处理失败")
            return
    
    total = len(files)
@ -142,15 +84,13 @@ def main():
    for i, pdf_file in enumerate(files, start=1):
        rate= round(i/total *100)
        print(f"进度: ", "-"* (rate//5)," "*(20-rate//5), f"   {rate}%",sep="",end="\r")
-        import time
-        # time.sleep(1)  # 模拟处理时间
        if not pdf_file.is_file():
            print(f"跳过非PDF文件：{pdf_file}")
            continue
        output_file = pdf_file.with_name(f"{pdf_file.stem}_decrypt.pdf")
-        success = copy_pdf_pages(pdf_file, output_file)
+        suc = copy_pdf_pages(pdf_file, output_file)

-        if not success:
+        if not suc:
            print(f"{pdf_file.name} 处理失败")

 if __name__ == "__main__":