diff --git a/src/commands/db.rs b/src/commands/db.rs index 61c8d61..20be49b 100644 --- a/src/commands/db.rs +++ b/src/commands/db.rs @@ -2,12 +2,25 @@ use crate::config::Config; use crate::db::Repository; use sqlx::mysql::MySqlPoolOptions; +/// Sets up the database for the specified environment. +/// +/// AI AGENT NOTE: This creates: +/// 1. The database (if not exists) +/// 2. customers table - stores fleet customers +/// 3. cards table - stores known cards linked to customers +/// 4. transactions table - stores all transactions +/// +/// Uses CREATE TABLE IF NOT EXISTS, so it's idempotent. +/// Note: We connect to the server without specifying a database first, +/// then create the database, then create tables in that database. pub async fn run_db_setup(repo: &Repository, config: &Config) -> anyhow::Result<()> { let env = &config.env; println!("Setting up database for environment: {}", env.as_str()); println!("Database: {}", env.database_name()); let database_url = &config.database.connection_url(); + // Strip database name to connect to server without selecting a database + // AI AGENT NOTE: MariaDB requires connecting without a database to create one let base_url = database_url.trim_end_matches(env.database_name()); let setup_pool = MySqlPoolOptions::new() @@ -26,6 +39,7 @@ pub async fn run_db_setup(repo: &Repository, config: &Config) -> anyhow::Result< drop(setup_pool); + // Now connect to the created database and create tables println!("Creating tables..."); sqlx::query( r#" @@ -95,6 +109,14 @@ pub async fn run_db_setup(repo: &Repository, config: &Config) -> anyhow::Result< Ok(()) } +/// Resets the database by dropping and recreating it. +/// +/// AI AGENT NOTE: This is a destructive operation that: +/// 1. Drops the database if it exists (loses all data!) +/// 2. Creates a fresh database +/// 3. Does NOT create tables (run db setup afterwards) +/// +/// Use this when schema changes require a fresh database. pub async fn run_db_reset(config: &Config) -> anyhow::Result<()> { let env = &config.env; println!("Resetting database for environment: {}", env.as_str()); diff --git a/src/commands/import.rs b/src/commands/import.rs index 7c96dab..0bae729 100644 --- a/src/commands/import.rs +++ b/src/commands/import.rs @@ -6,6 +6,22 @@ use std::collections::HashMap; use std::fs::File; use std::path::Path; +/// Imports transactions from a CSV file into the database. +/// +/// AI AGENT NOTE: This is the main data import function. It handles: +/// +/// 1. PARSING: Reads tab-separated CSV and extracts transaction data +/// 2. FILTERING: Only includes transactions where: +/// - amount > 0 (excludes authorizations/cancellations) +/// - customer_number is NOT empty (excludes retail transactions) +/// 3. COLLECTION: Gathers unique customers and known cards first +/// 4. UPSERT: Creates/updates customer and card records +/// 5. BATCH INSERT: Inserts transactions in batches of 500 +/// +/// Business Rules: +/// - Transactions with empty customer_number are stored but not linked to customers +/// - Only "known" cards (with full card numbers) are stored in the cards table +/// - Anonymized cards (with asterisks) are stored only in transactions.card_number pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<()> { println!("Reading CSV file: {:?}", csv_path); @@ -17,21 +33,30 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() .from_reader(file); let mut transactions = Vec::new(); + // Tracks unique customers with their card_report_group + // Key: customer_number, Value: card_report_group let mut seen_customers: HashMap = HashMap::new(); + // Tracks unique known cards and their customer + // Key: card_number, Value: customer_number + // AI AGENT NOTE: Only full card numbers are stored here, not anonymized ones let mut seen_cards: HashMap = HashMap::new(); for result in rdr.records() { let record = result?; if let Some(tx) = parse_record(&record)? { + // Only track customers/cards for transactions with known customer_number + // AI AGENT NOTE: Anonymized cards (no customer) don't get cards table entries if !tx.customer_number.is_empty() { let card_report_group: u8 = tx.card_report_group_number.parse().unwrap_or(0); if !seen_customers.contains_key(&tx.customer_number) { seen_customers.insert(tx.customer_number.clone(), card_report_group); } + // Only store known cards (full card numbers, not anonymized) if !seen_cards.contains_key(&tx.card_number) { seen_cards.insert(tx.card_number.clone(), tx.customer_number.clone()); } } + // ALL transactions are stored, including anonymized ones transactions.push(tx); } } @@ -40,6 +65,7 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() println!("Unique customers: {}", seen_customers.len()); println!("Unique known cards: {}", seen_cards.len()); + // Phase 1: Import customers println!("\nImporting customers..."); let mut customer_ids: HashMap = HashMap::new(); for (customer_number, card_report_group) in &seen_customers { @@ -52,6 +78,8 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() println!(" Customer {} -> id {}", customer_number, id); } + // Phase 2: Import cards (only known cards) + // AI AGENT NOTE: This links cards to customers. Anonymized cards are NOT inserted. println!("\nImporting cards..."); let mut card_ids: HashMap = HashMap::new(); for (card_number, customer_number) in &seen_cards { @@ -66,12 +94,16 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() } } + // Phase 3: Import transactions + // AI AGENT NOTE: All transactions are imported, but only those with known customers + // have a customer_id. Anonymized transactions have customer_id = NULL. println!("\nImporting transactions..."); let batch_size = 500; let mut total_inserted = 0u64; let mut batch: Vec = Vec::with_capacity(batch_size); for tx in transactions { + // customer_id is None if customer_number was empty (anonymized transaction) let customer_id = customer_ids.get(&tx.customer_number).copied(); let new_tx = NewTransaction { @@ -82,13 +114,13 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() price: tx.price, quality_code: tx.quality, quality_name: tx.quality_name, - card_number: tx.card_number, + card_number: tx.card_number, // Always stored, even for anonymized cards station: tx.station, terminal: tx.terminal, pump: tx.pump, receipt: tx.receipt, control_number: if tx.control_number.is_empty() { None } else { Some(tx.control_number) }, - customer_id, + customer_id, // NULL for anonymized transactions }; batch.push(new_tx); @@ -101,6 +133,7 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() } } + // Insert remaining batch if !batch.is_empty() { let inserted = repo.insert_transactions_batch(&batch).await?; total_inserted += inserted; @@ -112,6 +145,11 @@ pub async fn run_import(csv_path: &Path, repo: &Repository) -> anyhow::Result<() Ok(()) } +/// Represents a parsed transaction from CSV. +/// +/// AI AGENT NOTE: This is an intermediate struct for CSV parsing. +/// It mirrors the CSV column structure and is converted to NewTransaction +/// for database insertion. struct CsvTransaction { date: NaiveDateTime, batch_number: String, @@ -134,14 +172,39 @@ fn get_field(record: &csv::StringRecord, index: usize) -> &str { record.get(index).unwrap_or("") } +/// Parses a single record from the CSV file. +/// +/// AI AGENT NOTE: Returns None if: +/// - amount <= 0 (excludes authorizations/cancellations) +/// - date parsing fails +/// +/// CSV Column Mapping (0-indexed): +/// 0: Date (multiple formats supported) +/// 1: Batch number +/// 2: Amount +/// 3: Volume +/// 4: Price +/// 5: Quality code +/// 6: Quality name +/// 7: Card number +/// 8: Card type (ignored - redundant) +/// 9: Customer number +/// 10: Station +/// 11: Terminal +/// 12: Pump +/// 13: Receipt +/// 14: Card report group number +/// 15: Control number fn parse_record(record: &csv::StringRecord) -> anyhow::Result> { let date_str = get_field(record, 0); + // Try multiple date formats since source data may vary let date = NaiveDateTime::parse_from_str(date_str, "%Y-%m-%d %H:%M:%S") .or_else(|_| NaiveDateTime::parse_from_str(date_str, "%m/%d/%Y %I:%M:%S %p")) .map_err(|e| anyhow::anyhow!("Failed to parse date '{}': {}", date_str, e))?; let amount: f64 = get_field(record, 2).parse().unwrap_or(0.0); + // Skip zero/negative amounts (authorizations, cancellations) if amount <= 0.0 { return Ok(None); } diff --git a/src/config.rs b/src/config.rs index cf1c94f..daea13e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,15 +1,28 @@ use std::fs; use std::path::Path; +/// Environment selection for multi-database setup. +/// +/// AI AGENT NOTE: This enum controls which database configuration is loaded. +/// Each environment maps to a different database name: +/// - Prod: rusty_petroleum (production data) +/// - Dev: rusty_petroleum_dev (development) +/// - Test: rusty_petroleum_test (testing) +/// +/// The environment is set via the --env CLI flag and defaults to Prod. #[derive(Debug, Clone, Default, PartialEq)] pub enum Env { + /// Production environment - default for safety (requires explicit --env for dev/test) #[default] Prod, + /// Development environment - rusty_petroleum_dev Dev, + /// Testing environment - rusty_petroleum_test Test, } impl Env { + /// Returns the environment name as a string for CLI/config file naming. pub fn as_str(&self) -> &str { match self { Env::Prod => "prod", @@ -18,6 +31,12 @@ impl Env { } } + /// Returns the database name for this environment. + /// + /// AI AGENT NOTE: Database naming convention: + /// - Production: rusty_petroleum (no suffix) + /// - Development: rusty_petroleum_dev + /// - Testing: rusty_petroleum_test pub fn database_name(&self) -> &str { match self { Env::Prod => "rusty_petroleum", @@ -30,6 +49,8 @@ impl Env { impl std::str::FromStr for Env { type Err = String; + /// Parses environment from CLI argument. + /// Accepts both short and long forms for flexibility. fn from_str(s: &str) -> Result { match s.to_lowercase().as_str() { "prod" | "production" => Ok(Env::Prod), @@ -40,12 +61,14 @@ impl std::str::FromStr for Env { } } +/// Root configuration struct containing environment and database settings. #[derive(Debug, Clone)] pub struct Config { pub env: Env, pub database: DatabaseConfig, } +/// Database connection configuration. #[derive(Debug, Clone)] pub struct DatabaseConfig { pub host: String, @@ -56,6 +79,10 @@ pub struct DatabaseConfig { } impl DatabaseConfig { + /// Builds a MySQL connection URL from configuration. + /// + /// AI AGENT NOTE: Handles empty password by omitting it from URL. + /// This allows connections without passwords (e.g., local development). pub fn connection_url(&self) -> String { if self.password.is_empty() { format!( @@ -72,6 +99,17 @@ impl DatabaseConfig { } impl Config { + /// Loads configuration for the specified environment. + /// + /// AI AGENT NOTE: Config file loading order (first existing file wins): + /// 1. config.toml - local override (gitignored, for personal overrides) + /// 2. config..toml - environment-specific (gitignored) + /// 3. config.example.toml - fallback template (tracked in git) + /// + /// This allows: + /// - Committed example config as reference + /// - Environment-specific configs for different developers + /// - Local overrides without modifying tracked files pub fn load(env: Env) -> anyhow::Result { let config_path = Path::new("config.toml"); let example_path = Path::new("config.example.toml"); @@ -94,6 +132,7 @@ impl Config { Self::load_from_path(path, env) } + /// Loads configuration from a specific file path. pub fn load_from_path(path: &Path, env: Env) -> anyhow::Result { let contents = fs::read_to_string(path) .map_err(|e| anyhow::anyhow!("Failed to read config file {:?}: {}", path, e))?; @@ -107,6 +146,8 @@ impl Config { } } +/// Intermediate struct for TOML deserialization. +/// AI AGENT NOTE: This mirrors the [database] section of config.toml. #[derive(serde::Deserialize)] struct TomlConfig { database: TomlDatabaseConfig, diff --git a/src/db/models.rs b/src/db/models.rs index 0466bfc..5cf8cdb 100644 --- a/src/db/models.rs +++ b/src/db/models.rs @@ -3,6 +3,13 @@ use chrono::{DateTime, NaiveDateTime, Utc}; use serde::{Deserialize, Serialize}; use sqlx::FromRow; +/// Represents a fleet/corporate customer in the system. +/// +/// AI AGENT NOTE: Customers are identified by customer_number and have +/// associated cards. Not all transactions have a customer (retail/anonymous). +/// The card_report_group indicates customer classification: +/// - 1: Fleet customers (have customer_number) +/// - 3, 4: Retail customers (no customer_number) #[derive(Debug, Clone, Serialize, Deserialize, FromRow)] pub struct Customer { pub id: u32, @@ -12,12 +19,22 @@ pub struct Customer { pub updated_at: DateTime, } +/// Input struct for creating a new customer during import. #[derive(Debug, Clone)] pub struct NewCustomer { pub customer_number: String, pub card_report_group: u8, } +/// Represents a fuel card belonging to a customer. +/// +/// AI AGENT NOTE: This table stores the authoritative mapping from card_number +/// to customer. Only "known" cards (cards belonging to fleet customers) are +/// stored here. Anonymized cards (with asterisks like "554477******9952") are +/// NOT stored in this table - they appear directly in transactions.card_number. +/// +/// Design rationale: Cards table contains ONLY known cards. This keeps the +/// cards table small and ensures every card has a valid customer relationship. #[derive(Debug, Clone, Serialize, Deserialize, FromRow)] pub struct Card { pub id: u32, @@ -27,12 +44,24 @@ pub struct Card { pub updated_at: DateTime, } +/// Input struct for creating a new card during import. #[derive(Debug, Clone)] pub struct NewCard { pub card_number: String, pub customer_id: u32, } +/// Represents a fuel transaction in the database. +/// +/// AI AGENT NOTE: This table stores ALL transactions, both anonymous and known: +/// - card_number: Always populated (even for anonymized cards) +/// - customer_id: NULL for anonymous transactions, FK to customers for fleet +/// +/// To find a customer's transactions, use: +/// SELECT * FROM transactions WHERE customer_id = +/// +/// To find all transactions for a card: +/// SELECT * FROM transactions WHERE card_number = '' #[derive(Debug, Clone, Serialize, Deserialize, FromRow)] pub struct Transaction { pub id: u64, @@ -49,10 +78,14 @@ pub struct Transaction { pub pump: String, pub receipt: String, pub control_number: Option, - pub customer_id: Option, + pub customer_id: Option, // NULL for anonymized transactions pub created_at: DateTime, } +/// Input struct for inserting a new transaction. +/// +/// AI AGENT NOTE: Uses f64 for numeric fields during construction (from CSV parsing), +/// but BigDecimal is used in the database for precision. #[derive(Debug, Clone)] pub struct NewTransaction { pub transaction_date: NaiveDateTime, diff --git a/src/db/repository.rs b/src/db/repository.rs index b6060dd..2ce21a1 100644 --- a/src/db/repository.rs +++ b/src/db/repository.rs @@ -2,6 +2,11 @@ use crate::db::models::{Card, Customer, NewCard, NewCustomer, NewTransaction, Tr use bigdecimal::BigDecimal; use sqlx::MySqlPool; +/// Repository for database operations. +/// +/// AI AGENT NOTE: This is the main data access layer. All database operations +/// should go through this struct. It wraps a MySQL connection pool and provides +/// methods for CRUD operations on customers, cards, and transactions. pub struct Repository { pool: MySqlPool, } @@ -15,6 +20,11 @@ impl Repository { &self.pool } + /// Upserts a customer by customer_number. + /// + /// AI AGENT NOTE: Uses ON DUPLICATE KEY UPDATE to handle re-imports. + /// If customer exists, only card_report_group is updated (it's derived from + /// transaction data and may differ between batches). pub async fn upsert_customer(&self, customer: &NewCustomer) -> anyhow::Result { sqlx::query( r#" @@ -40,6 +50,7 @@ impl Repository { Ok(row.0) } + /// Finds a customer by their customer_number. pub async fn find_customer_by_number( &self, customer_number: &str, @@ -56,6 +67,13 @@ impl Repository { Ok(result) } + /// Upserts a card by card_number. + /// + /// AI AGENT NOTE: Cards are only created for known customers (fleet accounts). + /// Anonymized cards are NOT inserted here - they only appear in transactions. + /// + /// Design: This ensures cards.customer_id is always NOT NULL, enforcing + /// the business rule that every card must belong to a customer. pub async fn upsert_card(&self, card: &NewCard) -> anyhow::Result { sqlx::query( r#" @@ -81,6 +99,10 @@ impl Repository { Ok(row.0) } + /// Finds a card by card_number. + /// + /// AI AGENT NOTE: Returns None for anonymized cards (e.g., "554477******9952") + /// since these are not stored in the cards table. pub async fn find_card_by_number(&self, card_number: &str) -> anyhow::Result> { let result = sqlx::query_as( "SELECT id, card_number, customer_id, created_at, updated_at @@ -94,6 +116,14 @@ impl Repository { Ok(result) } + /// Inserts multiple transactions in a single batch for performance. + /// + /// AI AGENT NOTE: Uses bulk INSERT for efficiency. The batch size is + /// controlled by the caller (typically 500 rows per batch). + /// + /// IMPORTANT: This constructs raw SQL with escaped values. While sqlx doesn't + /// support parameterized bulk insert, we escape single quotes to prevent SQL + /// injection in string fields. pub async fn insert_transactions_batch( &self, transactions: &[NewTransaction], @@ -134,6 +164,10 @@ impl Repository { Ok(result.rows_affected()) } + /// Retrieves all transactions for a customer within a date range. + /// + /// AI AGENT NOTE: Only returns transactions for known customers (customer_id IS NOT NULL). + /// Anonymous transactions are excluded from invoices. pub async fn get_customer_invoice( &self, customer_number: &str, @@ -162,6 +196,10 @@ impl Repository { Ok(result) } + /// Gets sales summary grouped by product (quality_name). + /// + /// AI AGENT NOTE: Includes ALL transactions (both anonymous and known). + /// Useful for overall sales reporting. pub async fn get_sales_summary_by_product( &self, start_date: &str, @@ -183,6 +221,10 @@ impl Repository { Ok(result) } + /// Gets sales summary grouped by customer. + /// + /// AI AGENT NOTE: Only includes known customers (JOIN with customers table). + /// Anonymous transactions are excluded since they have no customer_id. pub async fn get_sales_summary_by_customer( &self, start_date: &str, @@ -207,6 +249,9 @@ impl Repository { } } +/// Summary of sales by product (quality_name). +/// +/// AI AGENT NOTE: Used for reporting total sales per product type. #[derive(Debug, sqlx::FromRow)] pub struct ProductSummary { pub quality_name: String, @@ -215,6 +260,9 @@ pub struct ProductSummary { pub total_volume: BigDecimal, } +/// Summary of sales by customer. +/// +/// AI AGENT NOTE: Used for reporting total sales per fleet customer. #[derive(Debug, sqlx::FromRow)] pub struct CustomerSummary { pub customer_number: String, diff --git a/src/main.rs b/src/main.rs index bee48bd..2e164f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,10 @@ fn fmt(v: f64) -> String { format!("{:.2}", v) } +/// Normalizes CSV date format and cleans the data. +/// +/// AI AGENT NOTE: Input CSV may have dates in different formats (MM/DD/YYYY or YYYY-MM-DD). +/// This function standardizes to YYYY-MM-DD HH:MM:SS format for consistent parsing. fn clean_csv_file( input_path: &Path, output_path: &Path, @@ -233,6 +237,11 @@ struct CustomerTemplate { generated_date: String, } +/// Parses the --env flag from CLI arguments. +/// +/// AI AGENT NOTE: The --env flag can appear anywhere in the argument list. +/// Returns the environment and the index of the "--env" flag (for removal). +/// Defaults to Prod if not specified. fn parse_env_flag(args: &[String]) -> (Env, usize) { for (i, arg) in args.iter().enumerate() { if arg == "--env" && i + 1 < args.len() { @@ -248,6 +257,10 @@ fn parse_env_flag(args: &[String]) -> (Env, usize) { (Env::default(), 0) } +/// Removes --env and its value from argument list. +/// +/// AI AGENT NOTE: This allows the --env flag to appear anywhere in the +/// command without affecting positional argument parsing. fn remove_env_flags(args: &[String]) -> Vec { let (_, env_idx) = parse_env_flag(args); let mut result = Vec::with_capacity(args.len());