Add shell script to rename PDFs by extracting customer numbers

- rename_pdfs.sh: Main script that renames PDFs based on content - extract_customer.py: Helper script to extract customer number from PDF
2026-03-23 14:38:56 +01:00
parent 1cc34b1a5c
commit b27c24f806
2 changed files with 59 additions and 0 deletions
--- a/extract_customer.py
+++ b/extract_customer.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+import sys
+import re
+from pypdf import PdfReader
+
+def extract_customer(pdf_path):
+    try:
+        reader = PdfReader(pdf_path)
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                if 'Fakturaöversikt' in text:
+                    return 'index'
+                match = re.search(r'Kund\s+(\d+)', text)
+                if match:
+                    return f'customer_{match.group(1)}'
+    except Exception as e:
+        pass
+    return None
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        result = extract_customer(sys.argv[1])
+        if result:
+            print(result)