import math lookup_dict = { "balmag04": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 427.0 }, "utlmag01": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 446.0 }, "utlmag02": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 486.0 }, "utlmag03": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 492.0 }, "utlmag04": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 414.0 }, "mnpmag03": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 494.0 }, "duemag01": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 488.0 }, "trv01": { "data_type": "int", "valid_min": 1.0, "valid_max": 24.0, "default_treatment_type": "unk", "observed_cap_min_value": 1.0, "observed_cap_max_value": 24.0 }, "trv02": { "data_type": "int", "valid_min": 0.0, "valid_max": 12.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12.0 }, "index01": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0 }, "index02": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0 }, "rvlr75": { "data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0 }, "rvlr77": { "data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0 }, "rev12": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 109.0 }, "rev13": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 82.0 }, "rev14": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 56.0 }, "rev54": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0 }, "rev84": { "data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0 }, "bkc14": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0 }, "bkc84": { "data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0 }, "ret12": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 42.0 }, "evtg04": { "data_type": "int", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 845.0 }, "rev201": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0 }, "rev202": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0 }, "rev203": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0 }, "rev223": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 49.0 }, "rev224": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0 }, "rev225": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0 }, "walshr02": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 2.0 }, "rev231": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 18101.0 }, "rev232": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38963.0 }, "rev233": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 68709.0 }, "rev252": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13988.0 }, "rev253": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12347.0 }, "all231": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38949.0 }, "at28a": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1330166.0 }, "at28b": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 351273.0 }, "at36s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0 }, "bc20s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 510.0 }, "bc21s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 190.0 }, "bc28s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 205768.0 }, "bc36s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0 }, "bc97a": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 184747.0 }, "bc98a": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 186395.0 }, "bc102s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 31000.0 }, "bc104s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 99.0 }, "bc107s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 15.0 }, "br20s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 495.0 }, "fi21s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162.0 }, "fi34s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0 }, "fi35s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 70016.0 }, "g051s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0 }, "g102s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0 }, "g105s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0 }, "g201a": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 255142.0 }, "g221d": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1.0 }, "g232s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 21.0 }, "g250a": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13.0 }, "g960s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 17.0 }, "g990s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 10.0 }, "mt20s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 261.0 }, "mt34s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0 }, "pb28s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 192100.0 }, "pb34s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0 }, "re28s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 311518.0 }, "re36s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0 }, "re102s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 41750.0 }, "s004s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 179.0 }, "s114s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0 }, "st32s": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 221686.0 }, "g106s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 590.0 }, "g242b": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0 }, "us21s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 153.0 }, "us34s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0 }, "g403s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0 }, "g405s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0 }, "g408s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0 }, "g411s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 8.0 }, "g416s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0 }, "g417s": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0 }, "agg901": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0 }, "agg902": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0 }, "agg908": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 69376.0 }, "agg911": { "data_type": "float", "valid_min": 0.0, "valid_max": 10000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 107.0 }, "rle904": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 309634.0 }, "p02d": { "data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0 }, "p02h": { "data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 7.0 }, "balmag01": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 474.0 }, "balmag02": { "data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 516.0 }, "cv13": { "data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0 }, "cv17": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0 }, "cv21": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171153.0 }, "cv25": { "data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 33.0 }, "ct319": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 98.0 }, "ct320": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 52.0 }, "cta11": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 71.0 }, "cta20": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0 }, "cta21": { "data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 148.0 }, "paymnt08": { "data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0 }, "rev321": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 234408.0 }, "rev322": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 219698.0 }, "bkc321": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171024.0 }, "bkc322": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162590.0 }, "bkc323": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 152068.0 }, "bkc324": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 136763.0 }, "bkc327": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171143.0 }, "bkc328": { "data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 164352.0 }, "eads66": { "data_type": "float", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 820.0 } } # 1. Pre-processing: type casting and variable treatment def pre_processing(input_dict): processed = {} for var, cfg in lookup_dict.items(): val = input_dict.get(var) # Cast to required type try: if cfg.get("data_type") == "int": val = int(val) elif cfg.get("data_type") == "float": val = float(val) except (ValueError, TypeError): val = None # Variable treatment vmin = cfg.get("valid_min") vmax = cfg.get("valid_max") if val is not None and not (math.isnan(vmin) or math.isnan(vmax)): if cfg.get("default_treatment_type") == "unk": if val < vmin or val > vmax: val = float("nan") cmin = cfg.get("observed_cap_min_value") cmax = cfg.get("observed_cap_max_value") if not math.isnan(val): if cmin is not None and vmin <= val < cmin: val = cmin if cmax is not None and cmax < val <= vmax: val = cmax processed[var] = val return processed