Add shell script to rename PDFs by extracting customer numbers
- rename_pdfs.sh: Main script that renames PDFs based on content - extract_customer.py: Helper script to extract customer number from PDF
This commit is contained in:
25
extract_customer.py
Executable file
25
extract_customer.py
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
def extract_customer(pdf_path):
|
||||||
|
try:
|
||||||
|
reader = PdfReader(pdf_path)
|
||||||
|
for page in reader.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
if text:
|
||||||
|
if 'Fakturaöversikt' in text:
|
||||||
|
return 'index'
|
||||||
|
match = re.search(r'Kund\s+(\d+)', text)
|
||||||
|
if match:
|
||||||
|
return f'customer_{match.group(1)}'
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
result = extract_customer(sys.argv[1])
|
||||||
|
if result:
|
||||||
|
print(result)
|
||||||
34
rename_pdfs.sh
Executable file
34
rename_pdfs.sh
Executable file
@@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Script to rename PDF files based on customer number extracted from their content
|
||||||
|
# Usage: ./rename_pdfs.sh [directory]
|
||||||
|
# Default directory: output
|
||||||
|
|
||||||
|
OUTPUT_DIR="${1:-output}"
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
FULL_DIR="$SCRIPT_DIR/$OUTPUT_DIR"
|
||||||
|
|
||||||
|
if [ ! -d "$FULL_DIR" ]; then
|
||||||
|
echo "Error: Directory '$FULL_DIR' not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
for pdf in "$FULL_DIR"/*.pdf; do
|
||||||
|
if [ -f "$pdf" ]; then
|
||||||
|
filename=$(basename "$pdf")
|
||||||
|
|
||||||
|
# Extract text from PDF using Python and pypdf
|
||||||
|
result=$(python3 "$SCRIPT_DIR/extract_customer.py" "$pdf" 2>/dev/null)
|
||||||
|
|
||||||
|
if [ -n "$result" ]; then
|
||||||
|
new_name="${result}.pdf"
|
||||||
|
if [ "$filename" != "$new_name" ]; then
|
||||||
|
mv "$pdf" "$FULL_DIR/$new_name"
|
||||||
|
echo "Renamed: $filename -> $new_name"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Could not extract customer number from: $filename"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
Reference in New Issue
Block a user