From b27c24f806f9ea8646dc0630e2033b6eec8a41a9 Mon Sep 17 00:00:00 2001 From: Jakob Date: Mon, 23 Mar 2026 14:38:56 +0100 Subject: [PATCH] Add shell script to rename PDFs by extracting customer numbers - rename_pdfs.sh: Main script that renames PDFs based on content - extract_customer.py: Helper script to extract customer number from PDF --- extract_customer.py | 25 +++++++++++++++++++++++++ rename_pdfs.sh | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100755 extract_customer.py create mode 100755 rename_pdfs.sh diff --git a/extract_customer.py b/extract_customer.py new file mode 100755 index 0000000..0209f18 --- /dev/null +++ b/extract_customer.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +import sys +import re +from pypdf import PdfReader + +def extract_customer(pdf_path): + try: + reader = PdfReader(pdf_path) + for page in reader.pages: + text = page.extract_text() + if text: + if 'Fakturaöversikt' in text: + return 'index' + match = re.search(r'Kund\s+(\d+)', text) + if match: + return f'customer_{match.group(1)}' + except Exception as e: + pass + return None + +if __name__ == '__main__': + if len(sys.argv) > 1: + result = extract_customer(sys.argv[1]) + if result: + print(result) diff --git a/rename_pdfs.sh b/rename_pdfs.sh new file mode 100755 index 0000000..64eed11 --- /dev/null +++ b/rename_pdfs.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Script to rename PDF files based on customer number extracted from their content +# Usage: ./rename_pdfs.sh [directory] +# Default directory: output + +OUTPUT_DIR="${1:-output}" + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +FULL_DIR="$SCRIPT_DIR/$OUTPUT_DIR" + +if [ ! -d "$FULL_DIR" ]; then + echo "Error: Directory '$FULL_DIR' not found" + exit 1 +fi + +for pdf in "$FULL_DIR"/*.pdf; do + if [ -f "$pdf" ]; then + filename=$(basename "$pdf") + + # Extract text from PDF using Python and pypdf + result=$(python3 "$SCRIPT_DIR/extract_customer.py" "$pdf" 2>/dev/null) + + if [ -n "$result" ]; then + new_name="${result}.pdf" + if [ "$filename" != "$new_name" ]; then + mv "$pdf" "$FULL_DIR/$new_name" + echo "Renamed: $filename -> $new_name" + fi + else + echo "Could not extract customer number from: $filename" + fi + fi +done