- rename_pdfs.sh: Main script that renames PDFs based on content - extract_customer.py: Helper script to extract customer number from PDF
26 lines
668 B
Python
Executable File
26 lines
668 B
Python
Executable File
#!/usr/bin/env python3
|
|
import sys
|
|
import re
|
|
from pypdf import PdfReader
|
|
|
|
def extract_customer(pdf_path):
|
|
try:
|
|
reader = PdfReader(pdf_path)
|
|
for page in reader.pages:
|
|
text = page.extract_text()
|
|
if text:
|
|
if 'Fakturaöversikt' in text:
|
|
return 'index'
|
|
match = re.search(r'Kund\s+(\d+)', text)
|
|
if match:
|
|
return f'customer_{match.group(1)}'
|
|
except Exception as e:
|
|
pass
|
|
return None
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) > 1:
|
|
result = extract_customer(sys.argv[1])
|
|
if result:
|
|
print(result)
|