Add preprocessing step to convert real input to cleaned format

- Read quoted tab-separated input with US date format (M/D/YYYY H:MM:SS AM/PM)
- Convert dates to ISO format (YYYY-MM-DD HH:MM:SS)
- Save cleaned version to .cleaned.csv alongside the HTML output
- Continue processing with existing workflow on cleaned file
This commit is contained in:
2026-03-23 13:58:36 +01:00
parent fd2e776181
commit 1cc34b1a5c

View File

@@ -1,5 +1,6 @@
use askama::Template;
use chrono::Utc;
use chrono::{NaiveDateTime, Utc};
use csv::ReaderBuilder;
use std::collections::HashMap;
use std::env;
use std::fs;
@@ -13,6 +14,65 @@ fn fmt(v: f64) -> String {
format!("{:.2}", v)
}
fn clean_csv_file(
input_path: &Path,
output_path: &Path,
) -> Result<String, Box<dyn std::error::Error>> {
let file = fs::File::open(input_path)?;
let mut rdr = ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(true)
.flexible(true)
.from_reader(file);
let output = fs::File::create(output_path)?;
let mut writer = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_writer(output);
let mut batch_number = String::new();
for result in rdr.records() {
let record = result?;
let date_str = record.get(0).unwrap_or("");
let batch = record.get(1).unwrap_or("").to_string();
if batch_number.is_empty() {
batch_number = batch.clone();
}
let date =
NaiveDateTime::parse_from_str(date_str, "%m/%d/%Y %I:%M:%S %p").unwrap_or_else(|_| {
NaiveDateTime::parse_from_str(date_str, "%Y-%m-%d %H:%M:%S").unwrap_or_default()
});
let row = vec![
date.format("%Y-%m-%d %H:%M:%S").to_string(),
batch,
record.get(2).unwrap_or("").to_string(),
record.get(3).unwrap_or("").to_string(),
record.get(4).unwrap_or("").to_string(),
record.get(5).unwrap_or("").to_string(),
record.get(6).unwrap_or("").to_string(),
record.get(7).unwrap_or("").to_string(),
record.get(8).unwrap_or("").to_string(),
record.get(9).unwrap_or("").to_string(),
record.get(10).unwrap_or("").to_string(),
record.get(11).unwrap_or("").to_string(),
record.get(12).unwrap_or("").to_string(),
record.get(13).unwrap_or("").to_string(),
record.get(14).unwrap_or("").to_string(),
record.get(15).unwrap_or("").to_string(),
];
writer.write_record(&row)?;
}
writer.flush()?;
Ok(batch_number)
}
#[derive(Clone)]
struct ProductSummary {
name: String,
@@ -171,22 +231,29 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
std::process::exit(1);
}
let batch = read_csv_file(input_path)?;
let filename = input_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
println!("Konverterar {} till rensat format...", filename);
let cleaned_path =
base_output_dir.join(format!("{}.cleaned.csv", filename.trim_end_matches(".txt")));
let batch_number = clean_csv_file(input_path, &cleaned_path)?;
println!(
"Laddade {} transaktioner från {}",
batch.transactions.len(),
batch.filename
"Konverterade {} transaktioner",
fs::read_to_string(&cleaned_path)?.lines().count() - 1
);
let batch_number = batch
.transactions
.first()
.map(|t| t.batch_number.clone())
.unwrap_or_else(|| "unknown".to_string());
let output_dir = base_output_dir.join(&batch_number);
fs::create_dir_all(&output_dir)?;
let batch = read_csv_file(&cleaned_path)?;
println!("Laddade {} transaktioner", batch.transactions.len());
let first_date = batch.transactions.first().map(|t| t.date).unwrap();
let last_date = batch.transactions.last().map(|t| t.date).unwrap();
let period = format!(