Add preprocessing step to convert real input to cleaned format
- Read quoted tab-separated input with US date format (M/D/YYYY H:MM:SS AM/PM) - Convert dates to ISO format (YYYY-MM-DD HH:MM:SS) - Save cleaned version to .cleaned.csv alongside the HTML output - Continue processing with existing workflow on cleaned file
This commit is contained in:
89
src/main.rs
89
src/main.rs
@@ -1,5 +1,6 @@
|
|||||||
use askama::Template;
|
use askama::Template;
|
||||||
use chrono::Utc;
|
use chrono::{NaiveDateTime, Utc};
|
||||||
|
use csv::ReaderBuilder;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
@@ -13,6 +14,65 @@ fn fmt(v: f64) -> String {
|
|||||||
format!("{:.2}", v)
|
format!("{:.2}", v)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn clean_csv_file(
|
||||||
|
input_path: &Path,
|
||||||
|
output_path: &Path,
|
||||||
|
) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
|
let file = fs::File::open(input_path)?;
|
||||||
|
let mut rdr = ReaderBuilder::new()
|
||||||
|
.delimiter(b'\t')
|
||||||
|
.has_headers(true)
|
||||||
|
.flexible(true)
|
||||||
|
.from_reader(file);
|
||||||
|
|
||||||
|
let output = fs::File::create(output_path)?;
|
||||||
|
let mut writer = csv::WriterBuilder::new()
|
||||||
|
.delimiter(b'\t')
|
||||||
|
.from_writer(output);
|
||||||
|
|
||||||
|
let mut batch_number = String::new();
|
||||||
|
|
||||||
|
for result in rdr.records() {
|
||||||
|
let record = result?;
|
||||||
|
|
||||||
|
let date_str = record.get(0).unwrap_or("");
|
||||||
|
let batch = record.get(1).unwrap_or("").to_string();
|
||||||
|
|
||||||
|
if batch_number.is_empty() {
|
||||||
|
batch_number = batch.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
let date =
|
||||||
|
NaiveDateTime::parse_from_str(date_str, "%m/%d/%Y %I:%M:%S %p").unwrap_or_else(|_| {
|
||||||
|
NaiveDateTime::parse_from_str(date_str, "%Y-%m-%d %H:%M:%S").unwrap_or_default()
|
||||||
|
});
|
||||||
|
|
||||||
|
let row = vec![
|
||||||
|
date.format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||||
|
batch,
|
||||||
|
record.get(2).unwrap_or("").to_string(),
|
||||||
|
record.get(3).unwrap_or("").to_string(),
|
||||||
|
record.get(4).unwrap_or("").to_string(),
|
||||||
|
record.get(5).unwrap_or("").to_string(),
|
||||||
|
record.get(6).unwrap_or("").to_string(),
|
||||||
|
record.get(7).unwrap_or("").to_string(),
|
||||||
|
record.get(8).unwrap_or("").to_string(),
|
||||||
|
record.get(9).unwrap_or("").to_string(),
|
||||||
|
record.get(10).unwrap_or("").to_string(),
|
||||||
|
record.get(11).unwrap_or("").to_string(),
|
||||||
|
record.get(12).unwrap_or("").to_string(),
|
||||||
|
record.get(13).unwrap_or("").to_string(),
|
||||||
|
record.get(14).unwrap_or("").to_string(),
|
||||||
|
record.get(15).unwrap_or("").to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
writer.write_record(&row)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.flush()?;
|
||||||
|
Ok(batch_number)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct ProductSummary {
|
struct ProductSummary {
|
||||||
name: String,
|
name: String,
|
||||||
@@ -171,22 +231,29 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let batch = read_csv_file(input_path)?;
|
let filename = input_path
|
||||||
|
.file_name()
|
||||||
|
.and_then(|n| n.to_str())
|
||||||
|
.unwrap_or("unknown")
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
println!("Konverterar {} till rensat format...", filename);
|
||||||
|
|
||||||
|
let cleaned_path =
|
||||||
|
base_output_dir.join(format!("{}.cleaned.csv", filename.trim_end_matches(".txt")));
|
||||||
|
let batch_number = clean_csv_file(input_path, &cleaned_path)?;
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"Laddade {} transaktioner från {}",
|
"Konverterade {} transaktioner",
|
||||||
batch.transactions.len(),
|
fs::read_to_string(&cleaned_path)?.lines().count() - 1
|
||||||
batch.filename
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let batch_number = batch
|
|
||||||
.transactions
|
|
||||||
.first()
|
|
||||||
.map(|t| t.batch_number.clone())
|
|
||||||
.unwrap_or_else(|| "unknown".to_string());
|
|
||||||
let output_dir = base_output_dir.join(&batch_number);
|
let output_dir = base_output_dir.join(&batch_number);
|
||||||
|
|
||||||
fs::create_dir_all(&output_dir)?;
|
fs::create_dir_all(&output_dir)?;
|
||||||
|
|
||||||
|
let batch = read_csv_file(&cleaned_path)?;
|
||||||
|
println!("Laddade {} transaktioner", batch.transactions.len());
|
||||||
|
|
||||||
let first_date = batch.transactions.first().map(|t| t.date).unwrap();
|
let first_date = batch.transactions.first().map(|t| t.date).unwrap();
|
||||||
let last_date = batch.transactions.last().map(|t| t.date).unwrap();
|
let last_date = batch.transactions.last().map(|t| t.date).unwrap();
|
||||||
let period = format!(
|
let period = format!(
|
||||||
|
|||||||
Reference in New Issue
Block a user