diff --git a/extract_customer.py b/extract_customer.py deleted file mode 100755 index 0209f18..0000000 --- a/extract_customer.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -import sys -import re -from pypdf import PdfReader - -def extract_customer(pdf_path): - try: - reader = PdfReader(pdf_path) - for page in reader.pages: - text = page.extract_text() - if text: - if 'Fakturaöversikt' in text: - return 'index' - match = re.search(r'Kund\s+(\d+)', text) - if match: - return f'customer_{match.group(1)}' - except Exception as e: - pass - return None - -if __name__ == '__main__': - if len(sys.argv) > 1: - result = extract_customer(sys.argv[1]) - if result: - print(result) diff --git a/rename_pdfs.sh b/rename_pdfs.sh deleted file mode 100755 index 64eed11..0000000 --- a/rename_pdfs.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# Script to rename PDF files based on customer number extracted from their content -# Usage: ./rename_pdfs.sh [directory] -# Default directory: output - -OUTPUT_DIR="${1:-output}" - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -FULL_DIR="$SCRIPT_DIR/$OUTPUT_DIR" - -if [ ! -d "$FULL_DIR" ]; then - echo "Error: Directory '$FULL_DIR' not found" - exit 1 -fi - -for pdf in "$FULL_DIR"/*.pdf; do - if [ -f "$pdf" ]; then - filename=$(basename "$pdf") - - # Extract text from PDF using Python and pypdf - result=$(python3 "$SCRIPT_DIR/extract_customer.py" "$pdf" 2>/dev/null) - - if [ -n "$result" ]; then - new_name="${result}.pdf" - if [ "$filename" != "$new_name" ]; then - mv "$pdf" "$FULL_DIR/$new_name" - echo "Renamed: $filename -> $new_name" - fi - else - echo "Could not extract customer number from: $filename" - fi - fi -done