Add shell script to rename PDFs by extracting customer numbers

- rename_pdfs.sh: Main script that renames PDFs based on content
- extract_customer.py: Helper script to extract customer number from PDF
This commit is contained in:
2026-03-23 14:38:56 +01:00
parent 1cc34b1a5c
commit b27c24f806
2 changed files with 59 additions and 0 deletions

25
extract_customer.py Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python3
import sys
import re
from pypdf import PdfReader
def extract_customer(pdf_path):
try:
reader = PdfReader(pdf_path)
for page in reader.pages:
text = page.extract_text()
if text:
if 'Fakturaöversikt' in text:
return 'index'
match = re.search(r'Kund\s+(\d+)', text)
if match:
return f'customer_{match.group(1)}'
except Exception as e:
pass
return None
if __name__ == '__main__':
if len(sys.argv) > 1:
result = extract_customer(sys.argv[1])
if result:
print(result)