Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fe9e388f36 | |||
| 8f7a795573 | |||
| 848a80fb09 | |||
| 13c5f21f58 | |||
| 295b9d22a7 | |||
| 46e6bc30ba | |||
| 7c6cc57dd1 | |||
| f40b9e685f | |||
| 95034efa4e | |||
| e2c2560e0a | |||
| f2e9d599b9 | |||
| 9c0d95f9e3 |
78
block.py
78
block.py
@ -1,21 +1,65 @@
|
||||
@flowx_block
|
||||
def example_function(request: dict) -> dict:
|
||||
import logging
|
||||
from typing import Dict
|
||||
from pre_processing import pre_processing
|
||||
from processing import processing
|
||||
from post_processing import post_processing
|
||||
from parse_report import extract_model_variables
|
||||
|
||||
# Processing logic here...
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
return {
|
||||
"meta_info": [
|
||||
{
|
||||
"name": "created_date",
|
||||
"type": "string",
|
||||
"value": "2024-11-05"
|
||||
|
||||
def __main__(application_id: str, creditBureau: dict) -> Dict:
|
||||
"""
|
||||
Process a single application record via the scoring pipeline.
|
||||
Returns:
|
||||
A dict containing:
|
||||
- prediction: float
|
||||
- grade: str
|
||||
- reason_codes: list or None
|
||||
"""
|
||||
record = extract_model_variables(creditBureau)
|
||||
|
||||
if not record or all(v is None for v in record.values()):
|
||||
final_result = {
|
||||
'application_id': application_id,
|
||||
'prediction': 0.99,
|
||||
'grade': 'M14',
|
||||
"reason_codes": [{
|
||||
"code": None,
|
||||
"rank": None,
|
||||
"description": "Lack of account information"
|
||||
}]
|
||||
}
|
||||
],
|
||||
"fields": [
|
||||
{
|
||||
"name": "",
|
||||
"type": "",
|
||||
"value": ""
|
||||
}
|
||||
]
|
||||
logger.info(
|
||||
f"final_result (early exit due to missing or empty extracted variables): {final_result}")
|
||||
return final_result
|
||||
|
||||
processed = pre_processing(record)
|
||||
|
||||
out = processing(processed)
|
||||
|
||||
final = post_processing(out, record)
|
||||
|
||||
final_result = {
|
||||
'application_id': application_id,
|
||||
'prediction': out['prediction'],
|
||||
'grade': final['grade'],
|
||||
'reason_codes': final['reason_codes'],
|
||||
}
|
||||
|
||||
logger.info(f"final_result: {final_result}")
|
||||
|
||||
return final_result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json
|
||||
import sys
|
||||
with open(sys.argv[1]) as f:
|
||||
data = json.load(f)
|
||||
__main__(application_id=data["application_id"], creditBureau=data)
|
||||
|
||||
210
parse_report.py
Normal file
210
parse_report.py
Normal file
@ -0,0 +1,210 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
VARIABLE_TO_CODE_MAP = {
|
||||
"agg901": "00H86",
|
||||
"agg902": "00H86",
|
||||
"agg908": "00H86",
|
||||
"agg911": "00H86",
|
||||
"all231": "00ASH",
|
||||
"at28a": "00WR3",
|
||||
"at28b": "00WR3",
|
||||
"at36s": "00WR3",
|
||||
"balmag01": "00H88",
|
||||
"balmag02": "00H88",
|
||||
"balmag04": "00AI5",
|
||||
"bc102s": "00WR3",
|
||||
"bc104s": "00WR3",
|
||||
"bc107s": "00WR3",
|
||||
"bc20s": "00WR3",
|
||||
"bc21s": "00WR3",
|
||||
"bc28s": "00WR3",
|
||||
"bc36s": "00WR3",
|
||||
"bc97a": "00WR3",
|
||||
"bc98a": "00WR3",
|
||||
"bkc14": "00ASF",
|
||||
"bkc321": "00ASI",
|
||||
"bkc322": "00ASI",
|
||||
"bkc323": "00ASI",
|
||||
"bkc324": "00ASI",
|
||||
"bkc327": "00ASI",
|
||||
"bkc328": "00ASI",
|
||||
"bkc84": "00ASF",
|
||||
"br20s": "00WR3",
|
||||
"ct319": "00AI9",
|
||||
"ct320": "00AI9",
|
||||
"cta11": "00AI9",
|
||||
"cta20": "00AI9",
|
||||
"cta21": "00AI9",
|
||||
"cv13": "00V26",
|
||||
"cv17": "00V26",
|
||||
"cv21": "00V26",
|
||||
"cv25": "00V26",
|
||||
"duemag01": "00AI5",
|
||||
"fi21s": "00WR3",
|
||||
"fi34s": "00WR3",
|
||||
"fi35s": "00WR3",
|
||||
"g051s": "00WR3",
|
||||
"g102s": "00WR3",
|
||||
"g105s": "00WR3",
|
||||
"g106s": "00WR3",
|
||||
"g201a": "00WR3",
|
||||
"g221d": "00WR3",
|
||||
"g232s": "00WR3",
|
||||
"g242b": "00WR3",
|
||||
"g250a": "00WR3",
|
||||
"g403s": "00WR3",
|
||||
"g405s": "00WR3",
|
||||
"g408s": "00WR3",
|
||||
"g411s": "00WR3",
|
||||
"g416s": "00WR3",
|
||||
"g417s": "00WR3",
|
||||
"g960s": "00WR3",
|
||||
"g990s": "00WR3",
|
||||
"index01": "00V53",
|
||||
"index02": "00V53",
|
||||
"mnpmag03": "00AI5",
|
||||
"mt20s": "00WR3",
|
||||
"mt34s": "00WR3",
|
||||
"p02d": "00WBO",
|
||||
"p02h": "00WBO",
|
||||
"paymnt08": "00H91",
|
||||
"pb28s": "00WR3",
|
||||
"pb34s": "00WR3",
|
||||
"re102s": "00WR3",
|
||||
"re28s": "00WR3",
|
||||
"re36s": "00WR3",
|
||||
"ret12": "00ASF",
|
||||
"rev12": "00ASF",
|
||||
"rev13": "00ASF",
|
||||
"rev14": "00ASF",
|
||||
"rev201": "00ASG",
|
||||
"rev202": "00ASG",
|
||||
"rev203": "00ASG",
|
||||
"rev223": "00ASG",
|
||||
"rev224": "00ASG",
|
||||
"rev225": "00ASG",
|
||||
"rev231": "00ASH",
|
||||
"rev232": "00ASH",
|
||||
"rev233": "00ASH",
|
||||
"rev252": "00ASH",
|
||||
"rev253": "00ASH",
|
||||
"rev321": "00ASI",
|
||||
"rev322": "00ASI",
|
||||
"rev54": "00ASF",
|
||||
"rev84": "00ASF",
|
||||
"rle904": "00ASJ",
|
||||
"rvlr75": "00WP4",
|
||||
"rvlr77": "00WP4",
|
||||
"s004s": "00WR3",
|
||||
"s114s": "00WR3",
|
||||
"st32s": "00WR3",
|
||||
"trv01": "00H87",
|
||||
"trv02": "00H87",
|
||||
"us21s": "00WR3",
|
||||
"us34s": "00WR3",
|
||||
"utlmag01": "00AI5",
|
||||
"utlmag02": "00AI5",
|
||||
"utlmag03": "00AI5",
|
||||
"utlmag04": "00AI5",
|
||||
"walshr02": "00H90",
|
||||
}
|
||||
|
||||
SCORE_TO_CODE_MAP = {
|
||||
"evtg04": "001NN",
|
||||
"eads66": "00WDC"
|
||||
}
|
||||
|
||||
|
||||
def safe_get(d, *keys):
|
||||
"""Attempts both snake_case and camelCase keys in a dict, handles None safely."""
|
||||
if not isinstance(d, dict):
|
||||
return {}
|
||||
for key in keys:
|
||||
if key in d:
|
||||
return d[key]
|
||||
return {}
|
||||
|
||||
|
||||
def extract_model_variables(creditBureau: dict) -> dict:
|
||||
if not creditBureau:
|
||||
return {}
|
||||
variable_code_map = VARIABLE_TO_CODE_MAP
|
||||
score_code_map = SCORE_TO_CODE_MAP
|
||||
# Step 0: Extract application_id
|
||||
extracted = {}
|
||||
|
||||
# Step 1: Locate add-on products with fallback casing
|
||||
credit_bureau = safe_get(creditBureau, "creditBureau", "credit_bureau")
|
||||
|
||||
subject_node = safe_get(
|
||||
safe_get(
|
||||
safe_get(credit_bureau, "product"),
|
||||
"subject"
|
||||
),
|
||||
"subject_record", "subjectRecord"
|
||||
)
|
||||
|
||||
add_ons = safe_get(subject_node, "add_on_product", "addOnProduct")
|
||||
value_map = {}
|
||||
|
||||
|
||||
# Step 2: Flatten values
|
||||
for product in add_ons if isinstance(add_ons, list) else []:
|
||||
code = product.get("code")
|
||||
score_model = safe_get(product, "score_model", "scoreModel")
|
||||
|
||||
# 2a. Flatten characteristics (already done)
|
||||
characteristics = score_model.get("characteristic", [])
|
||||
for char in characteristics:
|
||||
if isinstance(char, dict) and "id" in char and "value" in char:
|
||||
key = char["id"].lower()
|
||||
value_map[(code, key)] = char["value"]
|
||||
|
||||
# 2b. Also capture score result if present
|
||||
score = safe_get(score_model, "score")
|
||||
if score:
|
||||
# Capture score result
|
||||
results = score.get("results")
|
||||
if isinstance(results, str):
|
||||
value_map[(code, "score")] = results
|
||||
|
||||
# Capture top 4 score factors as list of {code, rank}
|
||||
raw_factors = score.get("factors", {}).get("factor", [])
|
||||
if not isinstance(raw_factors, list):
|
||||
raw_factors = []
|
||||
|
||||
top_factors = [
|
||||
{"code": f.get("code"), "rank": f.get("rank")}
|
||||
for f in raw_factors[:4]
|
||||
if isinstance(f, dict) and "code" in f and "rank" in f
|
||||
]
|
||||
if top_factors:
|
||||
value_map[(code, "factors")] = top_factors
|
||||
|
||||
|
||||
# Step 3.a: Use variable_to_code_map to fetch final vars
|
||||
for var, code in variable_code_map.items():
|
||||
extracted[var] = value_map.get((code, var))
|
||||
|
||||
# Step 3.b: Use score_to_code_map to fetch score vars
|
||||
for key, code in SCORE_TO_CODE_MAP.items():
|
||||
value = value_map.get((code, "score"))
|
||||
if value:
|
||||
extracted[key] = value.lstrip("+")
|
||||
|
||||
# Extract factor list, if available
|
||||
factor_list = value_map.get((code, "factors"))
|
||||
if factor_list:
|
||||
extracted[f"{key}_factors"] = factor_list
|
||||
|
||||
return extracted
|
||||
|
||||
if __name__ == "__main__":
|
||||
for filename in sys.argv[1:]:
|
||||
with open(filename) as f:
|
||||
data = json.load(f)
|
||||
print(f"\n--- Extracting from: {filename} ---")
|
||||
result = extract_model_variables(data)
|
||||
for k, v in result.items():
|
||||
print(f"{k}: {v}")
|
||||
253
post_processing.py
Normal file
253
post_processing.py
Normal file
@ -0,0 +1,253 @@
|
||||
import math
|
||||
|
||||
EPD_REASON_MAP = {
|
||||
"000": "No Adverse Factors",
|
||||
"001": "Available Credit On Bankcard Accounts Is Too Low",
|
||||
"006": "Bankcard Account Balances Are Too High In Proportion To Credit Limits",
|
||||
"010": "Too Many Delinquencies",
|
||||
"016": "Too Few Satisfactory Accounts",
|
||||
"020": "Length Of Time Revolving Accounts Have Been Established Is Too Short",
|
||||
"022": "Too many inquiries",
|
||||
"023": "Months Since Most Recent Delinquency Is Too Short",
|
||||
"024": "Too Many Serious Delinquencies",
|
||||
"026": "Number Of Delinquent Accounts Is Too High In Proportion To Total Number Of Accounts",
|
||||
"029": "Retail Account Balances Are Too High In Proportion To Credit Limits",
|
||||
"030": "Not Enough Retail Debt Experience",
|
||||
"031": "Revolving Account Balances Are Too High In Proportion To Credit Limits",
|
||||
"035": "Length Of Time Accounts Have Been Established Is Too Short",
|
||||
"037": "Too Few Bankcard Accounts",
|
||||
"043": "Too Few Open Revolving Accounts",
|
||||
"061": "Too Many Recently Opened Accounts",
|
||||
"066": "Too Many Serious Derogatory Items",
|
||||
"069": "Not Enough Debt Experience",
|
||||
"070": "Length Of Time Since Most Recent Bankcard Account Has Been Established Is Too Short",
|
||||
"074": "Too Few Satisfactory Revolving Accounts",
|
||||
"076": "Total Amount Past Due Is Too High",
|
||||
"103": "Not Enough Available Credit",
|
||||
"105": "Too Few Revolving Accounts",
|
||||
"117": "Length Of Time Since Most Seriously Delinquent Account Has Been Established Is Too Short",
|
||||
"132": "Too Few Open Accounts",
|
||||
"142": "Not Enough Balance Decreases On Active Non-Mortgage Accounts",
|
||||
"146": "Recency Of A Balance Overlimit On A Bankcard Account",
|
||||
"154": "Insufficient Payment Activity Over The Last Year",
|
||||
"155": "Recency Of Max Aggregate Bankcard Balance Over The Last Year",
|
||||
"158": "Too Few Open Retail Accounts",
|
||||
"174": "Too Few Open Bankcard Accounts",
|
||||
"181": "High Recent Balance Range Relative To Previous Balance Range",
|
||||
"192": "Not Enough Available Credit On Revolving Accounts",
|
||||
"201": "Length Of Time Since Oldest Auto Account Has Been Established Is Too Short"
|
||||
}
|
||||
|
||||
VANTAGE_REASON_MAP = {
|
||||
"10": "Too few accounts paid as agreed",
|
||||
"11": "Oldest account was opened too recently",
|
||||
"12": "Delinquent or derogatory status on accounts is too recent",
|
||||
"13": "Balances on delinquent or derogatory accounts are too high",
|
||||
"14": "Too high proportion of accounts recently opened",
|
||||
"15": "Lack of recently reported accounts",
|
||||
"16": "Total of credit limits and loan amounts is too low",
|
||||
"17": "No open accounts in your credit file",
|
||||
"18": "Lack of account information",
|
||||
"19": "No negative reason code",
|
||||
"20": "Delinquent or derogatory bankcard",
|
||||
"21": "Too many bankcards with a high balance",
|
||||
"22": "Too few bankcards with high credit limit",
|
||||
"23": "Too high proportion of bankcards recently opened",
|
||||
"24": "Too many bankcards with high balance compared to credit limit",
|
||||
"25": "Too high proportion of balances from bankcards",
|
||||
"26": "Balances on bankcards are too high",
|
||||
"27": "Delinquent or derogatory status on revolving accounts is too recent",
|
||||
"28": "Average credit limit on open bankcards is too low",
|
||||
"29": "Balances on bankcards are too high compared with credit limits",
|
||||
"30": "Too few open revolving accounts",
|
||||
"31": "Not enough available credit on revolving accounts",
|
||||
"32": "Oldest bankcard was opened too recently",
|
||||
"33": "Not enough balance paid down over time on bankcards",
|
||||
"34": "Most recently opened revolving account is too new",
|
||||
"35": "Lack of revolving account information",
|
||||
"36": "Lack of recently reported revolving accounts",
|
||||
"37": "No open bankcards in your credit file",
|
||||
"38": "Lack of bankcard account information",
|
||||
"39": "Balances on delinquent or derogatory bankcards are too high",
|
||||
"4": "Balances on accts too high compared to credit limits and loan amounts",
|
||||
"04": "Balances on accts too high compared to credit limits and loan amounts",
|
||||
"40": "Too many delinquent or derogatory revolving accounts",
|
||||
"41": "Average time since revolving accounts opened is too recent",
|
||||
"42": "Total credit limits on open revolving accounts are too low",
|
||||
"43": "Too many revolving accounts with high balance compared to credit limit",
|
||||
"44": "Balances on revolving accts are too high compared with credit limits",
|
||||
"45": "Not enough balance paid down over time on retail accounts",
|
||||
"46": "Oldest revolving account was opened too recently",
|
||||
"47": "No open retail accounts in your credit file",
|
||||
"48": "Lack of retail account information",
|
||||
"49": "Not enough balance paid down over time on revolving accounts",
|
||||
"5": "Too many recent delinquencies",
|
||||
"05": "Too many recent delinquencies",
|
||||
"50": "Balances on personal installment accts too high compared to loan amts",
|
||||
"51": "Too few installment accounts recently paid as agreed",
|
||||
"52": "Delinquent or derogatory installment account",
|
||||
"53": "Not enough balance paid down over time on installment accounts",
|
||||
"54": "Delinquent or derogatory status on installment accounts is too recent",
|
||||
"55": "Lack of recently reported auto accounts",
|
||||
"56": "Lack of recently reported installment accounts",
|
||||
"57": "No open installment accounts in your credit file",
|
||||
"58": "Lack of installment account information",
|
||||
"59": "Balances on retail cards are too high compared with credit limits",
|
||||
"6": "Too many accounts recently opened",
|
||||
"06": "Too many accounts recently opened",
|
||||
"60": "Total delinquent or derogatory balances on real estate loans too high",
|
||||
"61": "No open first mortgage accounts in your credit file",
|
||||
"62": "Lack of first mortgage account information",
|
||||
"63": "Delinquent or derogatory real estate secured loan",
|
||||
"64": "Not enough balance paid down over time on real estate secured loans",
|
||||
"65": "Oldest real estate secured loan was opened too recently",
|
||||
"66": "Delinquent or derogatory status on real estate loans is too recent",
|
||||
"67": "No open real estate secured loans in your credit file",
|
||||
"68": "Lack of real estate secured loan information",
|
||||
"69": "Too high proportion of balances from loans not secured by real estate",
|
||||
"7": "You have too many delinquent or derogatory accounts",
|
||||
"07": "You have too many delinquent or derogatory accounts",
|
||||
"70": "Too high proportion of auto accounts are delinquent or derogatory",
|
||||
"71": "Not enough balance paid down over time on auto accounts",
|
||||
"72": "Too few auto accounts paid as agreed",
|
||||
"73": "Delinquent or derogatory auto account",
|
||||
"74": "Balances on auto accounts are too high compared with loan amounts",
|
||||
"75": "Payments on auto accounts less than scheduled amount",
|
||||
"76": "Delinquent or derogatory status on auto accounts is too recent",
|
||||
"77": "No open auto accounts in your credit file",
|
||||
"78": "Lack of auto account information",
|
||||
"79": "No negative reason code",
|
||||
"8": "Too few accounts recently paid as agreed",
|
||||
"08": "Too few accounts recently paid as agreed",
|
||||
"80": "Delinquent or derogatory student loan",
|
||||
"81": "Not enough balance paid down over time on student loans",
|
||||
"82": "Lack of recently reported student loans",
|
||||
"83": "No negative reason code",
|
||||
"84": "Number of inquiries was a factor in determining the score",
|
||||
"85": "Too many inquiries",
|
||||
"86": "Derogatory public records",
|
||||
"87": "Unpaid collections",
|
||||
"88": "Bankruptcy",
|
||||
"89": "No negative reason code",
|
||||
"9": "Delinquent or derogatory account",
|
||||
"09": "Delinquent or derogatory account",
|
||||
"90": "No open revolving accounts in your credit file",
|
||||
"91": "Balances on delinquent or derogatory revolving accounts are too high",
|
||||
"92": "Delinquent or derogatory first mortgage",
|
||||
"93": "Not enough balance paid down over time on first mortgage accounts",
|
||||
"94": "No negative reason code",
|
||||
"95": "No negative reason code",
|
||||
"96": "Too few open accounts",
|
||||
"97": "Too few accounts"
|
||||
}
|
||||
|
||||
REASON_MAP = {
|
||||
'evtg04': "System Generated",
|
||||
'eads66': "System Generated",
|
||||
's004s': "Length of time on file is too short",
|
||||
'mt34s': "Not enough balance decreases on mortgage trades in the past 12 months",
|
||||
'ct320': "Insufficient payment activity",
|
||||
'us21s': "Length of time since most recent installment account has been established is too short",
|
||||
'utlmag02': "Revolving account balances are too high in proportion to credit limits over the last 24 months",
|
||||
'trv01': "Recency of a balance overlimit on a bankcard account",
|
||||
'us34s': "Not enough balance decreases on installment trades in the past 12 months"
|
||||
}
|
||||
|
||||
|
||||
def generate_reason_codes(score_key, factors):
|
||||
# fallback to 4 null rows if no factors found
|
||||
if not isinstance(factors, list) or len(factors) == 0:
|
||||
return [{"code": None, "rank": None, "description": None} for _ in range(4)]
|
||||
|
||||
reason_map = VANTAGE_REASON_MAP if score_key == "evtg04" else EPD_REASON_MAP if score_key == "eads66" else {}
|
||||
|
||||
reason_codes = []
|
||||
for f in factors[:4]:
|
||||
code = f.get("code")
|
||||
rank = f.get("rank")
|
||||
description = reason_map.get(str(code), "")
|
||||
reason_codes.append({
|
||||
"code": code,
|
||||
"rank": rank,
|
||||
"description": description
|
||||
})
|
||||
|
||||
# pad to 4
|
||||
while len(reason_codes) < 4:
|
||||
reason_codes.append({"code": None, "rank": None, "description": None})
|
||||
|
||||
return reason_codes
|
||||
|
||||
|
||||
def post_processing(processing_output, record):
|
||||
prediction = processing_output["prediction"]
|
||||
shape_reasoncode = processing_output["shape_reasoncode"]
|
||||
|
||||
# grade mapping:
|
||||
if prediction < 0:
|
||||
grade = "M14"
|
||||
else:
|
||||
m = math.ceil(prediction / 0.01)
|
||||
m = max(m, 1)
|
||||
m = min(m, 14)
|
||||
grade = f"M{m}"
|
||||
|
||||
# if prediction ≤ 0.04, not declined
|
||||
if prediction <= 0.04:
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_codes": [{
|
||||
"code": None,
|
||||
"rank": None,
|
||||
"description": None
|
||||
}]
|
||||
}
|
||||
|
||||
conditions = {
|
||||
'evtg04': lambda x: x < 700,
|
||||
'eads66': lambda x: x < 700,
|
||||
's004s': lambda x: x < 12,
|
||||
'mt34s': lambda x: x > 95,
|
||||
'ct320': lambda x: x <= 3,
|
||||
'us21s': lambda x: x <= 3,
|
||||
'utlmag02': lambda x: x > 300,
|
||||
'trv01': lambda x: x <= 3,
|
||||
'us34s': lambda x: x > 90
|
||||
}
|
||||
|
||||
for item in shape_reasoncode:
|
||||
feat = item.get("feature")
|
||||
val = item.get("value")
|
||||
cond = conditions.get(feat)
|
||||
|
||||
if cond:
|
||||
try:
|
||||
if cond(val):
|
||||
# If score-type feature (evtg04 or eads66) → full factors-based reason
|
||||
if feat in ("evtg04", "eads66"):
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_codes": generate_reason_codes(feat, record.get(f"{feat}_factors", []))
|
||||
}
|
||||
else:
|
||||
# Other features → only 1 reason code based on REASON_MAP
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_codes": [{
|
||||
"code": feat,
|
||||
"rank": "1",
|
||||
"description": REASON_MAP.get(feat, "Reason not mapped")
|
||||
}]
|
||||
}
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Default fallback
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_codes": [{
|
||||
"code": feat,
|
||||
"rank": "1",
|
||||
"description": "No suitable Product Offerings found"
|
||||
}]
|
||||
}
|
||||
360
pre_processing.py
Normal file
360
pre_processing.py
Normal file
@ -0,0 +1,360 @@
|
||||
import math
|
||||
|
||||
|
||||
lookup_dict = {
|
||||
"balmag04": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 427.0
|
||||
},
|
||||
"utlmag01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 446.0
|
||||
},
|
||||
"utlmag02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 486.0
|
||||
},
|
||||
"utlmag03": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 492.0
|
||||
},
|
||||
"utlmag04": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 414.0
|
||||
},
|
||||
"mnpmag03": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 494.0
|
||||
},
|
||||
"duemag01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 488.0
|
||||
},
|
||||
"trv01": {
|
||||
"data_type": "int", "valid_min": 1.0, "valid_max": 24.0, "default_treatment_type": "unk", "observed_cap_min_value": 1.0, "observed_cap_max_value": 24.0
|
||||
},
|
||||
"trv02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 12.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12.0
|
||||
},
|
||||
"index01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"index02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"rvlr75": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"rvlr77": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"rev12": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 109.0
|
||||
},
|
||||
"rev13": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 82.0
|
||||
},
|
||||
"rev14": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 56.0
|
||||
},
|
||||
"rev54": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
|
||||
},
|
||||
"rev84": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"bkc14": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
|
||||
},
|
||||
"bkc84": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"ret12": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 42.0
|
||||
},
|
||||
"evtg04": {
|
||||
"data_type": "int", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 845.0
|
||||
},
|
||||
"rev201": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev202": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev203": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev223": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 49.0
|
||||
},
|
||||
"rev224": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
|
||||
},
|
||||
"rev225": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
|
||||
},
|
||||
"walshr02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 2.0
|
||||
},
|
||||
"rev231": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 18101.0
|
||||
},
|
||||
"rev232": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38963.0
|
||||
},
|
||||
"rev233": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 68709.0
|
||||
},
|
||||
"rev252": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13988.0
|
||||
},
|
||||
"rev253": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12347.0
|
||||
},
|
||||
"all231": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38949.0
|
||||
},
|
||||
"at28a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1330166.0
|
||||
},
|
||||
"at28b": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 351273.0
|
||||
},
|
||||
"at36s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"bc20s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 510.0
|
||||
},
|
||||
"bc21s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 190.0
|
||||
},
|
||||
"bc28s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 205768.0
|
||||
},
|
||||
"bc36s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"bc97a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 184747.0
|
||||
},
|
||||
"bc98a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 186395.0
|
||||
},
|
||||
"bc102s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 31000.0
|
||||
},
|
||||
"bc104s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 99.0
|
||||
},
|
||||
"bc107s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 15.0
|
||||
},
|
||||
"br20s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 495.0
|
||||
},
|
||||
"fi21s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162.0
|
||||
},
|
||||
"fi34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
|
||||
},
|
||||
"fi35s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 70016.0
|
||||
},
|
||||
"g051s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"g102s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"g105s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"g201a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 255142.0
|
||||
},
|
||||
"g221d": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1.0
|
||||
},
|
||||
"g232s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 21.0
|
||||
},
|
||||
"g250a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13.0
|
||||
},
|
||||
"g960s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 17.0
|
||||
},
|
||||
"g990s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 10.0
|
||||
},
|
||||
"mt20s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 261.0
|
||||
},
|
||||
"mt34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
|
||||
},
|
||||
"pb28s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 192100.0
|
||||
},
|
||||
"pb34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
|
||||
},
|
||||
"re28s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 311518.0
|
||||
},
|
||||
"re36s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"re102s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 41750.0
|
||||
},
|
||||
"s004s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 179.0
|
||||
},
|
||||
"s114s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"st32s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 221686.0
|
||||
},
|
||||
"g106s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 590.0
|
||||
},
|
||||
"g242b": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"us21s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 153.0
|
||||
},
|
||||
"us34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
|
||||
},
|
||||
"g403s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"g405s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"g408s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"g411s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 8.0
|
||||
},
|
||||
"g416s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"g417s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"agg901": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"agg902": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"agg908": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 69376.0
|
||||
},
|
||||
"agg911": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 10000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 107.0
|
||||
},
|
||||
"rle904": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 309634.0
|
||||
},
|
||||
"p02d": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"p02h": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 7.0
|
||||
},
|
||||
"balmag01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 474.0
|
||||
},
|
||||
"balmag02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 516.0
|
||||
},
|
||||
"cv13": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"cv17": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
|
||||
},
|
||||
"cv21": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171153.0
|
||||
},
|
||||
"cv25": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 33.0
|
||||
},
|
||||
"ct319": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 98.0
|
||||
},
|
||||
"ct320": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 52.0
|
||||
},
|
||||
"cta11": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 71.0
|
||||
},
|
||||
"cta20": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
|
||||
},
|
||||
"cta21": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 148.0
|
||||
},
|
||||
"paymnt08": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev321": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 234408.0
|
||||
},
|
||||
"rev322": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 219698.0
|
||||
},
|
||||
"bkc321": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171024.0
|
||||
},
|
||||
"bkc322": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162590.0
|
||||
},
|
||||
"bkc323": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 152068.0
|
||||
},
|
||||
"bkc324": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 136763.0
|
||||
},
|
||||
"bkc327": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171143.0
|
||||
},
|
||||
"bkc328": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 164352.0
|
||||
},
|
||||
"eads66": {
|
||||
"data_type": "float", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 820.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# 1. Pre-processing: type casting and variable treatment
|
||||
def pre_processing(input_dict):
|
||||
processed = {}
|
||||
for var, cfg in lookup_dict.items():
|
||||
val = input_dict.get(var)
|
||||
# Cast to required type
|
||||
try:
|
||||
if cfg.get("data_type") == "int":
|
||||
val = int(val)
|
||||
elif cfg.get("data_type") == "float":
|
||||
val = float(val)
|
||||
except (ValueError, TypeError):
|
||||
val = None
|
||||
# Variable treatment
|
||||
vmin = cfg.get("valid_min")
|
||||
vmax = cfg.get("valid_max")
|
||||
if val is not None and not (math.isnan(vmin) or math.isnan(vmax)):
|
||||
if cfg.get("default_treatment_type") == "unk":
|
||||
if val < vmin or val > vmax:
|
||||
val = float("nan")
|
||||
cmin = cfg.get("observed_cap_min_value")
|
||||
cmax = cfg.get("observed_cap_max_value")
|
||||
if not math.isnan(val):
|
||||
if cmin is not None and vmin <= val < cmin:
|
||||
val = cmin
|
||||
if cmax is not None and cmax < val <= vmax:
|
||||
val = cmax
|
||||
processed[var] = val
|
||||
return processed
|
||||
39
processing.py
Normal file
39
processing.py
Normal file
@ -0,0 +1,39 @@
|
||||
import joblib
|
||||
import xgboost as xgb
|
||||
import shap
|
||||
|
||||
def processing(processed_dict):
|
||||
# model_path = r"C:\Users\abinisha\citrus\centurion-dataform-mcard\flowx\model_1\artifacts\xgboost_model_v2.joblib"
|
||||
model_path = "./xgboost_model.joblib"
|
||||
model = joblib.load(model_path)
|
||||
explainer = shap.TreeExplainer(model)
|
||||
feature_names = model.feature_names
|
||||
|
||||
# build and score
|
||||
X = [processed_dict[f] for f in feature_names]
|
||||
dmatrix = xgb.DMatrix([X], feature_names=feature_names)
|
||||
pred = float(model.predict(dmatrix)[0])
|
||||
|
||||
# get SHAP values as 1×n_features
|
||||
shap_matrix = explainer.shap_values(dmatrix)
|
||||
shap_vals = shap_matrix[0] if getattr(shap_matrix, "ndim", 1) > 1 else shap_matrix
|
||||
|
||||
# rank & sort features by descending SHAP impact
|
||||
ranked = sorted(zip(feature_names, shap_vals),
|
||||
key=lambda fv: fv[1],
|
||||
reverse=True)
|
||||
|
||||
# build a *list* of pre-sorted reason-codes
|
||||
shape_reasoncode = [
|
||||
{
|
||||
"feature": feat,
|
||||
"value": processed_dict[feat],
|
||||
"shap_rank": i+1
|
||||
}
|
||||
for i, (feat, _) in enumerate(ranked)
|
||||
]
|
||||
|
||||
return {
|
||||
"prediction": pred,
|
||||
"shape_reasoncode": shape_reasoncode
|
||||
}
|
||||
@ -1 +1,14 @@
|
||||
{}
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"application_id": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Unique identifier for the application."
|
||||
},
|
||||
"creditBureau": {
|
||||
"type": ["object", "null"],
|
||||
"description": "Credit bureau details"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1 +1,3 @@
|
||||
{}
|
||||
joblib == 1.3.2
|
||||
xgboost == 2.1.4
|
||||
shap == 0.46.0
|
||||
|
||||
@ -1 +1,48 @@
|
||||
{}
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"application_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the application"
|
||||
},
|
||||
"prediction": {
|
||||
"type": "number",
|
||||
"description": "Model's predicted probability score"
|
||||
},
|
||||
"grade": {
|
||||
"type": "string",
|
||||
"description": "HD Model Grade"
|
||||
},
|
||||
"reason_codes": {
|
||||
"type": "array",
|
||||
"description": "List of reason codes explaining the model decision",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Feature or score reason code"
|
||||
},
|
||||
"rank": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Rank of importance (1 to 4)"
|
||||
},
|
||||
"description": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Human-readable explanation for the reason"
|
||||
}
|
||||
},
|
||||
"required": ["code", "rank", "description"]
|
||||
},
|
||||
"minItems": 1,
|
||||
"maxItems": 4
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"application_id",
|
||||
"prediction",
|
||||
"grade",
|
||||
"reason_codes"
|
||||
]
|
||||
}
|
||||
|
||||
12611
test_block.py
Normal file
12611
test_block.py
Normal file
File diff suppressed because it is too large
Load Diff
BIN
xgboost_model.joblib
Normal file
BIN
xgboost_model.joblib
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user