Add shell script to rename PDFs by extracting customer numbers
- rename_pdfs.sh: Main script that renames PDFs based on content - extract_customer.py: Helper script to extract customer number from PDF
This commit is contained in:
25
extract_customer.py
Executable file
25
extract_customer.py
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import re
|
||||
from pypdf import PdfReader
|
||||
|
||||
def extract_customer(pdf_path):
|
||||
try:
|
||||
reader = PdfReader(pdf_path)
|
||||
for page in reader.pages:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
if 'Fakturaöversikt' in text:
|
||||
return 'index'
|
||||
match = re.search(r'Kund\s+(\d+)', text)
|
||||
if match:
|
||||
return f'customer_{match.group(1)}'
|
||||
except Exception as e:
|
||||
pass
|
||||
return None
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) > 1:
|
||||
result = extract_customer(sys.argv[1])
|
||||
if result:
|
||||
print(result)
|
||||
Reference in New Issue
Block a user