- rename_pdfs.sh: Main script that renames PDFs based on content - extract_customer.py: Helper script to extract customer number from PDF
35 lines
951 B
Bash
Executable File
35 lines
951 B
Bash
Executable File
#!/bin/bash
|
|
|
|
# Script to rename PDF files based on customer number extracted from their content
|
|
# Usage: ./rename_pdfs.sh [directory]
|
|
# Default directory: output
|
|
|
|
OUTPUT_DIR="${1:-output}"
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
FULL_DIR="$SCRIPT_DIR/$OUTPUT_DIR"
|
|
|
|
if [ ! -d "$FULL_DIR" ]; then
|
|
echo "Error: Directory '$FULL_DIR' not found"
|
|
exit 1
|
|
fi
|
|
|
|
for pdf in "$FULL_DIR"/*.pdf; do
|
|
if [ -f "$pdf" ]; then
|
|
filename=$(basename "$pdf")
|
|
|
|
# Extract text from PDF using Python and pypdf
|
|
result=$(python3 "$SCRIPT_DIR/extract_customer.py" "$pdf" 2>/dev/null)
|
|
|
|
if [ -n "$result" ]; then
|
|
new_name="${result}.pdf"
|
|
if [ "$filename" != "$new_name" ]; then
|
|
mv "$pdf" "$FULL_DIR/$new_name"
|
|
echo "Renamed: $filename -> $new_name"
|
|
fi
|
|
else
|
|
echo "Could not extract customer number from: $filename"
|
|
fi
|
|
fi
|
|
done
|