import csv

# FILES
export_file = "Item Price.csv"       # ERPNext export
output_file = "Item Price-import-ready.csv" # clean import CSV

# Template columns (exact Column Name values for ERPNext import)
template_columns = [
    "name","item_code","uom","price_list","price_list_rate","packing_unit",
    "item_name","brand","item_description","customer","supplier","batch_no",
    "buying","selling","currency","valid_from","lead_time_days","valid_upto",
    "note","reference"
]

# Which row has the Column Name row in ERPNext export? Usually 20th (0-index 19)
COLUMN_NAME_ROW = 19
DATA_START_ROW = 21  # 0-indexed row where actual data starts

def clean_cell(cell):
    # Remove extra quotes around the data
    if cell.startswith('"""') and cell.endswith('"""'):
        return cell[3:-3]
    elif cell.startswith('"') and cell.endswith('"'):
        return cell[1:-1]
    return cell

# Read the export
with open(export_file, newline='', encoding='utf-8') as f:
    reader = list(csv.reader(f))
    
    export_columns = [clean_cell(c) for c in reader[COLUMN_NAME_ROW]]
    data_rows = reader[DATA_START_ROW-1:]

# Build column index map
col_indexes = []
for col in template_columns:
    if col in export_columns:
        col_indexes.append(export_columns.index(col))
    else:
        col_indexes.append(None)  # fill missing columns with empty string

# Write clean CSV
with open(output_file, "w", newline='', encoding='utf-8') as f_out:
    writer = csv.writer(f_out, quoting=csv.QUOTE_ALL)
    
    # Header row: template
    writer.writerow(template_columns)
    
    for row in data_rows:
        clean_row = []
        for idx in col_indexes:
            if idx is not None and idx < len(row):
                clean_row.append(clean_cell(row[idx]))
            else:
                clean_row.append("")
        writer.writerow(clean_row)

print(f"Clean Item Price CSV written to {output_file}")