Add shell script to rename PDFs by extracting customer numbers

- rename_pdfs.sh: Main script that renames PDFs based on content
- extract_customer.py: Helper script to extract customer number from PDF
This commit is contained in:
2026-03-23 14:38:56 +01:00
parent 1cc34b1a5c
commit b27c24f806
2 changed files with 59 additions and 0 deletions

25
extract_customer.py Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env python3
import sys
import re
from pypdf import PdfReader
def extract_customer(pdf_path):
try:
reader = PdfReader(pdf_path)
for page in reader.pages:
text = page.extract_text()
if text:
if 'Fakturaöversikt' in text:
return 'index'
match = re.search(r'Kund\s+(\d+)', text)
if match:
return f'customer_{match.group(1)}'
except Exception as e:
pass
return None
if __name__ == '__main__':
if len(sys.argv) > 1:
result = extract_customer(sys.argv[1])
if result:
print(result)

34
rename_pdfs.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
# Script to rename PDF files based on customer number extracted from their content
# Usage: ./rename_pdfs.sh [directory]
# Default directory: output
OUTPUT_DIR="${1:-output}"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
FULL_DIR="$SCRIPT_DIR/$OUTPUT_DIR"
if [ ! -d "$FULL_DIR" ]; then
echo "Error: Directory '$FULL_DIR' not found"
exit 1
fi
for pdf in "$FULL_DIR"/*.pdf; do
if [ -f "$pdf" ]; then
filename=$(basename "$pdf")
# Extract text from PDF using Python and pypdf
result=$(python3 "$SCRIPT_DIR/extract_customer.py" "$pdf" 2>/dev/null)
if [ -n "$result" ]; then
new_name="${result}.pdf"
if [ "$filename" != "$new_name" ]; then
mv "$pdf" "$FULL_DIR/$new_name"
echo "Renamed: $filename -> $new_name"
fi
else
echo "Could not extract customer number from: $filename"
fi
fi
done