Add preprocessing step to convert real input to cleaned format
- Read quoted tab-separated input with US date format (M/D/YYYY H:MM:SS AM/PM) - Convert dates to ISO format (YYYY-MM-DD HH:MM:SS) - Save cleaned version to .cleaned.csv alongside the HTML output - Continue processing with existing workflow on cleaned file
This commit is contained in:
89
src/main.rs
89
src/main.rs
@@ -1,5 +1,6 @@
|
||||
use askama::Template;
|
||||
use chrono::Utc;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use csv::ReaderBuilder;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
@@ -13,6 +14,65 @@ fn fmt(v: f64) -> String {
|
||||
format!("{:.2}", v)
|
||||
}
|
||||
|
||||
fn clean_csv_file(
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let file = fs::File::open(input_path)?;
|
||||
let mut rdr = ReaderBuilder::new()
|
||||
.delimiter(b'\t')
|
||||
.has_headers(true)
|
||||
.flexible(true)
|
||||
.from_reader(file);
|
||||
|
||||
let output = fs::File::create(output_path)?;
|
||||
let mut writer = csv::WriterBuilder::new()
|
||||
.delimiter(b'\t')
|
||||
.from_writer(output);
|
||||
|
||||
let mut batch_number = String::new();
|
||||
|
||||
for result in rdr.records() {
|
||||
let record = result?;
|
||||
|
||||
let date_str = record.get(0).unwrap_or("");
|
||||
let batch = record.get(1).unwrap_or("").to_string();
|
||||
|
||||
if batch_number.is_empty() {
|
||||
batch_number = batch.clone();
|
||||
}
|
||||
|
||||
let date =
|
||||
NaiveDateTime::parse_from_str(date_str, "%m/%d/%Y %I:%M:%S %p").unwrap_or_else(|_| {
|
||||
NaiveDateTime::parse_from_str(date_str, "%Y-%m-%d %H:%M:%S").unwrap_or_default()
|
||||
});
|
||||
|
||||
let row = vec![
|
||||
date.format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
batch,
|
||||
record.get(2).unwrap_or("").to_string(),
|
||||
record.get(3).unwrap_or("").to_string(),
|
||||
record.get(4).unwrap_or("").to_string(),
|
||||
record.get(5).unwrap_or("").to_string(),
|
||||
record.get(6).unwrap_or("").to_string(),
|
||||
record.get(7).unwrap_or("").to_string(),
|
||||
record.get(8).unwrap_or("").to_string(),
|
||||
record.get(9).unwrap_or("").to_string(),
|
||||
record.get(10).unwrap_or("").to_string(),
|
||||
record.get(11).unwrap_or("").to_string(),
|
||||
record.get(12).unwrap_or("").to_string(),
|
||||
record.get(13).unwrap_or("").to_string(),
|
||||
record.get(14).unwrap_or("").to_string(),
|
||||
record.get(15).unwrap_or("").to_string(),
|
||||
];
|
||||
|
||||
writer.write_record(&row)?;
|
||||
}
|
||||
|
||||
writer.flush()?;
|
||||
Ok(batch_number)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ProductSummary {
|
||||
name: String,
|
||||
@@ -171,22 +231,29 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let batch = read_csv_file(input_path)?;
|
||||
let filename = input_path
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
|
||||
println!("Konverterar {} till rensat format...", filename);
|
||||
|
||||
let cleaned_path =
|
||||
base_output_dir.join(format!("{}.cleaned.csv", filename.trim_end_matches(".txt")));
|
||||
let batch_number = clean_csv_file(input_path, &cleaned_path)?;
|
||||
|
||||
println!(
|
||||
"Laddade {} transaktioner från {}",
|
||||
batch.transactions.len(),
|
||||
batch.filename
|
||||
"Konverterade {} transaktioner",
|
||||
fs::read_to_string(&cleaned_path)?.lines().count() - 1
|
||||
);
|
||||
|
||||
let batch_number = batch
|
||||
.transactions
|
||||
.first()
|
||||
.map(|t| t.batch_number.clone())
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
let output_dir = base_output_dir.join(&batch_number);
|
||||
|
||||
fs::create_dir_all(&output_dir)?;
|
||||
|
||||
let batch = read_csv_file(&cleaned_path)?;
|
||||
println!("Laddade {} transaktioner", batch.transactions.len());
|
||||
|
||||
let first_date = batch.transactions.first().map(|t| t.date).unwrap();
|
||||
let last_date = batch.transactions.last().map(|t| t.date).unwrap();
|
||||
let period = format!(
|
||||
|
||||
Reference in New Issue
Block a user