Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fe9e388f36 | |||
| 8f7a795573 | |||
| 848a80fb09 | |||
| 13c5f21f58 | |||
| 295b9d22a7 | |||
| 46e6bc30ba | |||
| 7c6cc57dd1 | |||
| f40b9e685f | |||
| 95034efa4e | |||
| e2c2560e0a | |||
| f2e9d599b9 | |||
| 9c0d95f9e3 |
82
block.py
82
block.py
@ -1,21 +1,65 @@
|
|||||||
@flowx_block
|
import logging
|
||||||
def example_function(request: dict) -> dict:
|
from typing import Dict
|
||||||
|
from pre_processing import pre_processing
|
||||||
|
from processing import processing
|
||||||
|
from post_processing import post_processing
|
||||||
|
from parse_report import extract_model_variables
|
||||||
|
|
||||||
# Processing logic here...
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
return {
|
|
||||||
"meta_info": [
|
def __main__(application_id: str, creditBureau: dict) -> Dict:
|
||||||
{
|
"""
|
||||||
"name": "created_date",
|
Process a single application record via the scoring pipeline.
|
||||||
"type": "string",
|
Returns:
|
||||||
"value": "2024-11-05"
|
A dict containing:
|
||||||
}
|
- prediction: float
|
||||||
],
|
- grade: str
|
||||||
"fields": [
|
- reason_codes: list or None
|
||||||
{
|
"""
|
||||||
"name": "",
|
record = extract_model_variables(creditBureau)
|
||||||
"type": "",
|
|
||||||
"value": ""
|
if not record or all(v is None for v in record.values()):
|
||||||
}
|
final_result = {
|
||||||
]
|
'application_id': application_id,
|
||||||
}
|
'prediction': 0.99,
|
||||||
|
'grade': 'M14',
|
||||||
|
"reason_codes": [{
|
||||||
|
"code": None,
|
||||||
|
"rank": None,
|
||||||
|
"description": "Lack of account information"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
logger.info(
|
||||||
|
f"final_result (early exit due to missing or empty extracted variables): {final_result}")
|
||||||
|
return final_result
|
||||||
|
|
||||||
|
processed = pre_processing(record)
|
||||||
|
|
||||||
|
out = processing(processed)
|
||||||
|
|
||||||
|
final = post_processing(out, record)
|
||||||
|
|
||||||
|
final_result = {
|
||||||
|
'application_id': application_id,
|
||||||
|
'prediction': out['prediction'],
|
||||||
|
'grade': final['grade'],
|
||||||
|
'reason_codes': final['reason_codes'],
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"final_result: {final_result}")
|
||||||
|
|
||||||
|
return final_result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
with open(sys.argv[1]) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
__main__(application_id=data["application_id"], creditBureau=data)
|
||||||
|
|||||||
210
parse_report.py
Normal file
210
parse_report.py
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
VARIABLE_TO_CODE_MAP = {
|
||||||
|
"agg901": "00H86",
|
||||||
|
"agg902": "00H86",
|
||||||
|
"agg908": "00H86",
|
||||||
|
"agg911": "00H86",
|
||||||
|
"all231": "00ASH",
|
||||||
|
"at28a": "00WR3",
|
||||||
|
"at28b": "00WR3",
|
||||||
|
"at36s": "00WR3",
|
||||||
|
"balmag01": "00H88",
|
||||||
|
"balmag02": "00H88",
|
||||||
|
"balmag04": "00AI5",
|
||||||
|
"bc102s": "00WR3",
|
||||||
|
"bc104s": "00WR3",
|
||||||
|
"bc107s": "00WR3",
|
||||||
|
"bc20s": "00WR3",
|
||||||
|
"bc21s": "00WR3",
|
||||||
|
"bc28s": "00WR3",
|
||||||
|
"bc36s": "00WR3",
|
||||||
|
"bc97a": "00WR3",
|
||||||
|
"bc98a": "00WR3",
|
||||||
|
"bkc14": "00ASF",
|
||||||
|
"bkc321": "00ASI",
|
||||||
|
"bkc322": "00ASI",
|
||||||
|
"bkc323": "00ASI",
|
||||||
|
"bkc324": "00ASI",
|
||||||
|
"bkc327": "00ASI",
|
||||||
|
"bkc328": "00ASI",
|
||||||
|
"bkc84": "00ASF",
|
||||||
|
"br20s": "00WR3",
|
||||||
|
"ct319": "00AI9",
|
||||||
|
"ct320": "00AI9",
|
||||||
|
"cta11": "00AI9",
|
||||||
|
"cta20": "00AI9",
|
||||||
|
"cta21": "00AI9",
|
||||||
|
"cv13": "00V26",
|
||||||
|
"cv17": "00V26",
|
||||||
|
"cv21": "00V26",
|
||||||
|
"cv25": "00V26",
|
||||||
|
"duemag01": "00AI5",
|
||||||
|
"fi21s": "00WR3",
|
||||||
|
"fi34s": "00WR3",
|
||||||
|
"fi35s": "00WR3",
|
||||||
|
"g051s": "00WR3",
|
||||||
|
"g102s": "00WR3",
|
||||||
|
"g105s": "00WR3",
|
||||||
|
"g106s": "00WR3",
|
||||||
|
"g201a": "00WR3",
|
||||||
|
"g221d": "00WR3",
|
||||||
|
"g232s": "00WR3",
|
||||||
|
"g242b": "00WR3",
|
||||||
|
"g250a": "00WR3",
|
||||||
|
"g403s": "00WR3",
|
||||||
|
"g405s": "00WR3",
|
||||||
|
"g408s": "00WR3",
|
||||||
|
"g411s": "00WR3",
|
||||||
|
"g416s": "00WR3",
|
||||||
|
"g417s": "00WR3",
|
||||||
|
"g960s": "00WR3",
|
||||||
|
"g990s": "00WR3",
|
||||||
|
"index01": "00V53",
|
||||||
|
"index02": "00V53",
|
||||||
|
"mnpmag03": "00AI5",
|
||||||
|
"mt20s": "00WR3",
|
||||||
|
"mt34s": "00WR3",
|
||||||
|
"p02d": "00WBO",
|
||||||
|
"p02h": "00WBO",
|
||||||
|
"paymnt08": "00H91",
|
||||||
|
"pb28s": "00WR3",
|
||||||
|
"pb34s": "00WR3",
|
||||||
|
"re102s": "00WR3",
|
||||||
|
"re28s": "00WR3",
|
||||||
|
"re36s": "00WR3",
|
||||||
|
"ret12": "00ASF",
|
||||||
|
"rev12": "00ASF",
|
||||||
|
"rev13": "00ASF",
|
||||||
|
"rev14": "00ASF",
|
||||||
|
"rev201": "00ASG",
|
||||||
|
"rev202": "00ASG",
|
||||||
|
"rev203": "00ASG",
|
||||||
|
"rev223": "00ASG",
|
||||||
|
"rev224": "00ASG",
|
||||||
|
"rev225": "00ASG",
|
||||||
|
"rev231": "00ASH",
|
||||||
|
"rev232": "00ASH",
|
||||||
|
"rev233": "00ASH",
|
||||||
|
"rev252": "00ASH",
|
||||||
|
"rev253": "00ASH",
|
||||||
|
"rev321": "00ASI",
|
||||||
|
"rev322": "00ASI",
|
||||||
|
"rev54": "00ASF",
|
||||||
|
"rev84": "00ASF",
|
||||||
|
"rle904": "00ASJ",
|
||||||
|
"rvlr75": "00WP4",
|
||||||
|
"rvlr77": "00WP4",
|
||||||
|
"s004s": "00WR3",
|
||||||
|
"s114s": "00WR3",
|
||||||
|
"st32s": "00WR3",
|
||||||
|
"trv01": "00H87",
|
||||||
|
"trv02": "00H87",
|
||||||
|
"us21s": "00WR3",
|
||||||
|
"us34s": "00WR3",
|
||||||
|
"utlmag01": "00AI5",
|
||||||
|
"utlmag02": "00AI5",
|
||||||
|
"utlmag03": "00AI5",
|
||||||
|
"utlmag04": "00AI5",
|
||||||
|
"walshr02": "00H90",
|
||||||
|
}
|
||||||
|
|
||||||
|
SCORE_TO_CODE_MAP = {
|
||||||
|
"evtg04": "001NN",
|
||||||
|
"eads66": "00WDC"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def safe_get(d, *keys):
|
||||||
|
"""Attempts both snake_case and camelCase keys in a dict, handles None safely."""
|
||||||
|
if not isinstance(d, dict):
|
||||||
|
return {}
|
||||||
|
for key in keys:
|
||||||
|
if key in d:
|
||||||
|
return d[key]
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_model_variables(creditBureau: dict) -> dict:
|
||||||
|
if not creditBureau:
|
||||||
|
return {}
|
||||||
|
variable_code_map = VARIABLE_TO_CODE_MAP
|
||||||
|
score_code_map = SCORE_TO_CODE_MAP
|
||||||
|
# Step 0: Extract application_id
|
||||||
|
extracted = {}
|
||||||
|
|
||||||
|
# Step 1: Locate add-on products with fallback casing
|
||||||
|
credit_bureau = safe_get(creditBureau, "creditBureau", "credit_bureau")
|
||||||
|
|
||||||
|
subject_node = safe_get(
|
||||||
|
safe_get(
|
||||||
|
safe_get(credit_bureau, "product"),
|
||||||
|
"subject"
|
||||||
|
),
|
||||||
|
"subject_record", "subjectRecord"
|
||||||
|
)
|
||||||
|
|
||||||
|
add_ons = safe_get(subject_node, "add_on_product", "addOnProduct")
|
||||||
|
value_map = {}
|
||||||
|
|
||||||
|
|
||||||
|
# Step 2: Flatten values
|
||||||
|
for product in add_ons if isinstance(add_ons, list) else []:
|
||||||
|
code = product.get("code")
|
||||||
|
score_model = safe_get(product, "score_model", "scoreModel")
|
||||||
|
|
||||||
|
# 2a. Flatten characteristics (already done)
|
||||||
|
characteristics = score_model.get("characteristic", [])
|
||||||
|
for char in characteristics:
|
||||||
|
if isinstance(char, dict) and "id" in char and "value" in char:
|
||||||
|
key = char["id"].lower()
|
||||||
|
value_map[(code, key)] = char["value"]
|
||||||
|
|
||||||
|
# 2b. Also capture score result if present
|
||||||
|
score = safe_get(score_model, "score")
|
||||||
|
if score:
|
||||||
|
# Capture score result
|
||||||
|
results = score.get("results")
|
||||||
|
if isinstance(results, str):
|
||||||
|
value_map[(code, "score")] = results
|
||||||
|
|
||||||
|
# Capture top 4 score factors as list of {code, rank}
|
||||||
|
raw_factors = score.get("factors", {}).get("factor", [])
|
||||||
|
if not isinstance(raw_factors, list):
|
||||||
|
raw_factors = []
|
||||||
|
|
||||||
|
top_factors = [
|
||||||
|
{"code": f.get("code"), "rank": f.get("rank")}
|
||||||
|
for f in raw_factors[:4]
|
||||||
|
if isinstance(f, dict) and "code" in f and "rank" in f
|
||||||
|
]
|
||||||
|
if top_factors:
|
||||||
|
value_map[(code, "factors")] = top_factors
|
||||||
|
|
||||||
|
|
||||||
|
# Step 3.a: Use variable_to_code_map to fetch final vars
|
||||||
|
for var, code in variable_code_map.items():
|
||||||
|
extracted[var] = value_map.get((code, var))
|
||||||
|
|
||||||
|
# Step 3.b: Use score_to_code_map to fetch score vars
|
||||||
|
for key, code in SCORE_TO_CODE_MAP.items():
|
||||||
|
value = value_map.get((code, "score"))
|
||||||
|
if value:
|
||||||
|
extracted[key] = value.lstrip("+")
|
||||||
|
|
||||||
|
# Extract factor list, if available
|
||||||
|
factor_list = value_map.get((code, "factors"))
|
||||||
|
if factor_list:
|
||||||
|
extracted[f"{key}_factors"] = factor_list
|
||||||
|
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for filename in sys.argv[1:]:
|
||||||
|
with open(filename) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
print(f"\n--- Extracting from: {filename} ---")
|
||||||
|
result = extract_model_variables(data)
|
||||||
|
for k, v in result.items():
|
||||||
|
print(f"{k}: {v}")
|
||||||
253
post_processing.py
Normal file
253
post_processing.py
Normal file
@ -0,0 +1,253 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
EPD_REASON_MAP = {
|
||||||
|
"000": "No Adverse Factors",
|
||||||
|
"001": "Available Credit On Bankcard Accounts Is Too Low",
|
||||||
|
"006": "Bankcard Account Balances Are Too High In Proportion To Credit Limits",
|
||||||
|
"010": "Too Many Delinquencies",
|
||||||
|
"016": "Too Few Satisfactory Accounts",
|
||||||
|
"020": "Length Of Time Revolving Accounts Have Been Established Is Too Short",
|
||||||
|
"022": "Too many inquiries",
|
||||||
|
"023": "Months Since Most Recent Delinquency Is Too Short",
|
||||||
|
"024": "Too Many Serious Delinquencies",
|
||||||
|
"026": "Number Of Delinquent Accounts Is Too High In Proportion To Total Number Of Accounts",
|
||||||
|
"029": "Retail Account Balances Are Too High In Proportion To Credit Limits",
|
||||||
|
"030": "Not Enough Retail Debt Experience",
|
||||||
|
"031": "Revolving Account Balances Are Too High In Proportion To Credit Limits",
|
||||||
|
"035": "Length Of Time Accounts Have Been Established Is Too Short",
|
||||||
|
"037": "Too Few Bankcard Accounts",
|
||||||
|
"043": "Too Few Open Revolving Accounts",
|
||||||
|
"061": "Too Many Recently Opened Accounts",
|
||||||
|
"066": "Too Many Serious Derogatory Items",
|
||||||
|
"069": "Not Enough Debt Experience",
|
||||||
|
"070": "Length Of Time Since Most Recent Bankcard Account Has Been Established Is Too Short",
|
||||||
|
"074": "Too Few Satisfactory Revolving Accounts",
|
||||||
|
"076": "Total Amount Past Due Is Too High",
|
||||||
|
"103": "Not Enough Available Credit",
|
||||||
|
"105": "Too Few Revolving Accounts",
|
||||||
|
"117": "Length Of Time Since Most Seriously Delinquent Account Has Been Established Is Too Short",
|
||||||
|
"132": "Too Few Open Accounts",
|
||||||
|
"142": "Not Enough Balance Decreases On Active Non-Mortgage Accounts",
|
||||||
|
"146": "Recency Of A Balance Overlimit On A Bankcard Account",
|
||||||
|
"154": "Insufficient Payment Activity Over The Last Year",
|
||||||
|
"155": "Recency Of Max Aggregate Bankcard Balance Over The Last Year",
|
||||||
|
"158": "Too Few Open Retail Accounts",
|
||||||
|
"174": "Too Few Open Bankcard Accounts",
|
||||||
|
"181": "High Recent Balance Range Relative To Previous Balance Range",
|
||||||
|
"192": "Not Enough Available Credit On Revolving Accounts",
|
||||||
|
"201": "Length Of Time Since Oldest Auto Account Has Been Established Is Too Short"
|
||||||
|
}
|
||||||
|
|
||||||
|
VANTAGE_REASON_MAP = {
|
||||||
|
"10": "Too few accounts paid as agreed",
|
||||||
|
"11": "Oldest account was opened too recently",
|
||||||
|
"12": "Delinquent or derogatory status on accounts is too recent",
|
||||||
|
"13": "Balances on delinquent or derogatory accounts are too high",
|
||||||
|
"14": "Too high proportion of accounts recently opened",
|
||||||
|
"15": "Lack of recently reported accounts",
|
||||||
|
"16": "Total of credit limits and loan amounts is too low",
|
||||||
|
"17": "No open accounts in your credit file",
|
||||||
|
"18": "Lack of account information",
|
||||||
|
"19": "No negative reason code",
|
||||||
|
"20": "Delinquent or derogatory bankcard",
|
||||||
|
"21": "Too many bankcards with a high balance",
|
||||||
|
"22": "Too few bankcards with high credit limit",
|
||||||
|
"23": "Too high proportion of bankcards recently opened",
|
||||||
|
"24": "Too many bankcards with high balance compared to credit limit",
|
||||||
|
"25": "Too high proportion of balances from bankcards",
|
||||||
|
"26": "Balances on bankcards are too high",
|
||||||
|
"27": "Delinquent or derogatory status on revolving accounts is too recent",
|
||||||
|
"28": "Average credit limit on open bankcards is too low",
|
||||||
|
"29": "Balances on bankcards are too high compared with credit limits",
|
||||||
|
"30": "Too few open revolving accounts",
|
||||||
|
"31": "Not enough available credit on revolving accounts",
|
||||||
|
"32": "Oldest bankcard was opened too recently",
|
||||||
|
"33": "Not enough balance paid down over time on bankcards",
|
||||||
|
"34": "Most recently opened revolving account is too new",
|
||||||
|
"35": "Lack of revolving account information",
|
||||||
|
"36": "Lack of recently reported revolving accounts",
|
||||||
|
"37": "No open bankcards in your credit file",
|
||||||
|
"38": "Lack of bankcard account information",
|
||||||
|
"39": "Balances on delinquent or derogatory bankcards are too high",
|
||||||
|
"4": "Balances on accts too high compared to credit limits and loan amounts",
|
||||||
|
"04": "Balances on accts too high compared to credit limits and loan amounts",
|
||||||
|
"40": "Too many delinquent or derogatory revolving accounts",
|
||||||
|
"41": "Average time since revolving accounts opened is too recent",
|
||||||
|
"42": "Total credit limits on open revolving accounts are too low",
|
||||||
|
"43": "Too many revolving accounts with high balance compared to credit limit",
|
||||||
|
"44": "Balances on revolving accts are too high compared with credit limits",
|
||||||
|
"45": "Not enough balance paid down over time on retail accounts",
|
||||||
|
"46": "Oldest revolving account was opened too recently",
|
||||||
|
"47": "No open retail accounts in your credit file",
|
||||||
|
"48": "Lack of retail account information",
|
||||||
|
"49": "Not enough balance paid down over time on revolving accounts",
|
||||||
|
"5": "Too many recent delinquencies",
|
||||||
|
"05": "Too many recent delinquencies",
|
||||||
|
"50": "Balances on personal installment accts too high compared to loan amts",
|
||||||
|
"51": "Too few installment accounts recently paid as agreed",
|
||||||
|
"52": "Delinquent or derogatory installment account",
|
||||||
|
"53": "Not enough balance paid down over time on installment accounts",
|
||||||
|
"54": "Delinquent or derogatory status on installment accounts is too recent",
|
||||||
|
"55": "Lack of recently reported auto accounts",
|
||||||
|
"56": "Lack of recently reported installment accounts",
|
||||||
|
"57": "No open installment accounts in your credit file",
|
||||||
|
"58": "Lack of installment account information",
|
||||||
|
"59": "Balances on retail cards are too high compared with credit limits",
|
||||||
|
"6": "Too many accounts recently opened",
|
||||||
|
"06": "Too many accounts recently opened",
|
||||||
|
"60": "Total delinquent or derogatory balances on real estate loans too high",
|
||||||
|
"61": "No open first mortgage accounts in your credit file",
|
||||||
|
"62": "Lack of first mortgage account information",
|
||||||
|
"63": "Delinquent or derogatory real estate secured loan",
|
||||||
|
"64": "Not enough balance paid down over time on real estate secured loans",
|
||||||
|
"65": "Oldest real estate secured loan was opened too recently",
|
||||||
|
"66": "Delinquent or derogatory status on real estate loans is too recent",
|
||||||
|
"67": "No open real estate secured loans in your credit file",
|
||||||
|
"68": "Lack of real estate secured loan information",
|
||||||
|
"69": "Too high proportion of balances from loans not secured by real estate",
|
||||||
|
"7": "You have too many delinquent or derogatory accounts",
|
||||||
|
"07": "You have too many delinquent or derogatory accounts",
|
||||||
|
"70": "Too high proportion of auto accounts are delinquent or derogatory",
|
||||||
|
"71": "Not enough balance paid down over time on auto accounts",
|
||||||
|
"72": "Too few auto accounts paid as agreed",
|
||||||
|
"73": "Delinquent or derogatory auto account",
|
||||||
|
"74": "Balances on auto accounts are too high compared with loan amounts",
|
||||||
|
"75": "Payments on auto accounts less than scheduled amount",
|
||||||
|
"76": "Delinquent or derogatory status on auto accounts is too recent",
|
||||||
|
"77": "No open auto accounts in your credit file",
|
||||||
|
"78": "Lack of auto account information",
|
||||||
|
"79": "No negative reason code",
|
||||||
|
"8": "Too few accounts recently paid as agreed",
|
||||||
|
"08": "Too few accounts recently paid as agreed",
|
||||||
|
"80": "Delinquent or derogatory student loan",
|
||||||
|
"81": "Not enough balance paid down over time on student loans",
|
||||||
|
"82": "Lack of recently reported student loans",
|
||||||
|
"83": "No negative reason code",
|
||||||
|
"84": "Number of inquiries was a factor in determining the score",
|
||||||
|
"85": "Too many inquiries",
|
||||||
|
"86": "Derogatory public records",
|
||||||
|
"87": "Unpaid collections",
|
||||||
|
"88": "Bankruptcy",
|
||||||
|
"89": "No negative reason code",
|
||||||
|
"9": "Delinquent or derogatory account",
|
||||||
|
"09": "Delinquent or derogatory account",
|
||||||
|
"90": "No open revolving accounts in your credit file",
|
||||||
|
"91": "Balances on delinquent or derogatory revolving accounts are too high",
|
||||||
|
"92": "Delinquent or derogatory first mortgage",
|
||||||
|
"93": "Not enough balance paid down over time on first mortgage accounts",
|
||||||
|
"94": "No negative reason code",
|
||||||
|
"95": "No negative reason code",
|
||||||
|
"96": "Too few open accounts",
|
||||||
|
"97": "Too few accounts"
|
||||||
|
}
|
||||||
|
|
||||||
|
REASON_MAP = {
|
||||||
|
'evtg04': "System Generated",
|
||||||
|
'eads66': "System Generated",
|
||||||
|
's004s': "Length of time on file is too short",
|
||||||
|
'mt34s': "Not enough balance decreases on mortgage trades in the past 12 months",
|
||||||
|
'ct320': "Insufficient payment activity",
|
||||||
|
'us21s': "Length of time since most recent installment account has been established is too short",
|
||||||
|
'utlmag02': "Revolving account balances are too high in proportion to credit limits over the last 24 months",
|
||||||
|
'trv01': "Recency of a balance overlimit on a bankcard account",
|
||||||
|
'us34s': "Not enough balance decreases on installment trades in the past 12 months"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def generate_reason_codes(score_key, factors):
|
||||||
|
# fallback to 4 null rows if no factors found
|
||||||
|
if not isinstance(factors, list) or len(factors) == 0:
|
||||||
|
return [{"code": None, "rank": None, "description": None} for _ in range(4)]
|
||||||
|
|
||||||
|
reason_map = VANTAGE_REASON_MAP if score_key == "evtg04" else EPD_REASON_MAP if score_key == "eads66" else {}
|
||||||
|
|
||||||
|
reason_codes = []
|
||||||
|
for f in factors[:4]:
|
||||||
|
code = f.get("code")
|
||||||
|
rank = f.get("rank")
|
||||||
|
description = reason_map.get(str(code), "")
|
||||||
|
reason_codes.append({
|
||||||
|
"code": code,
|
||||||
|
"rank": rank,
|
||||||
|
"description": description
|
||||||
|
})
|
||||||
|
|
||||||
|
# pad to 4
|
||||||
|
while len(reason_codes) < 4:
|
||||||
|
reason_codes.append({"code": None, "rank": None, "description": None})
|
||||||
|
|
||||||
|
return reason_codes
|
||||||
|
|
||||||
|
|
||||||
|
def post_processing(processing_output, record):
|
||||||
|
prediction = processing_output["prediction"]
|
||||||
|
shape_reasoncode = processing_output["shape_reasoncode"]
|
||||||
|
|
||||||
|
# grade mapping:
|
||||||
|
if prediction < 0:
|
||||||
|
grade = "M14"
|
||||||
|
else:
|
||||||
|
m = math.ceil(prediction / 0.01)
|
||||||
|
m = max(m, 1)
|
||||||
|
m = min(m, 14)
|
||||||
|
grade = f"M{m}"
|
||||||
|
|
||||||
|
# if prediction ≤ 0.04, not declined
|
||||||
|
if prediction <= 0.04:
|
||||||
|
return {
|
||||||
|
"grade": grade,
|
||||||
|
"reason_codes": [{
|
||||||
|
"code": None,
|
||||||
|
"rank": None,
|
||||||
|
"description": None
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
conditions = {
|
||||||
|
'evtg04': lambda x: x < 700,
|
||||||
|
'eads66': lambda x: x < 700,
|
||||||
|
's004s': lambda x: x < 12,
|
||||||
|
'mt34s': lambda x: x > 95,
|
||||||
|
'ct320': lambda x: x <= 3,
|
||||||
|
'us21s': lambda x: x <= 3,
|
||||||
|
'utlmag02': lambda x: x > 300,
|
||||||
|
'trv01': lambda x: x <= 3,
|
||||||
|
'us34s': lambda x: x > 90
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in shape_reasoncode:
|
||||||
|
feat = item.get("feature")
|
||||||
|
val = item.get("value")
|
||||||
|
cond = conditions.get(feat)
|
||||||
|
|
||||||
|
if cond:
|
||||||
|
try:
|
||||||
|
if cond(val):
|
||||||
|
# If score-type feature (evtg04 or eads66) → full factors-based reason
|
||||||
|
if feat in ("evtg04", "eads66"):
|
||||||
|
return {
|
||||||
|
"grade": grade,
|
||||||
|
"reason_codes": generate_reason_codes(feat, record.get(f"{feat}_factors", []))
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Other features → only 1 reason code based on REASON_MAP
|
||||||
|
return {
|
||||||
|
"grade": grade,
|
||||||
|
"reason_codes": [{
|
||||||
|
"code": feat,
|
||||||
|
"rank": "1",
|
||||||
|
"description": REASON_MAP.get(feat, "Reason not mapped")
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Default fallback
|
||||||
|
return {
|
||||||
|
"grade": grade,
|
||||||
|
"reason_codes": [{
|
||||||
|
"code": feat,
|
||||||
|
"rank": "1",
|
||||||
|
"description": "No suitable Product Offerings found"
|
||||||
|
}]
|
||||||
|
}
|
||||||
360
pre_processing.py
Normal file
360
pre_processing.py
Normal file
@ -0,0 +1,360 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
lookup_dict = {
|
||||||
|
"balmag04": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 427.0
|
||||||
|
},
|
||||||
|
"utlmag01": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 446.0
|
||||||
|
},
|
||||||
|
"utlmag02": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 486.0
|
||||||
|
},
|
||||||
|
"utlmag03": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 492.0
|
||||||
|
},
|
||||||
|
"utlmag04": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 414.0
|
||||||
|
},
|
||||||
|
"mnpmag03": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 494.0
|
||||||
|
},
|
||||||
|
"duemag01": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 488.0
|
||||||
|
},
|
||||||
|
"trv01": {
|
||||||
|
"data_type": "int", "valid_min": 1.0, "valid_max": 24.0, "default_treatment_type": "unk", "observed_cap_min_value": 1.0, "observed_cap_max_value": 24.0
|
||||||
|
},
|
||||||
|
"trv02": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 12.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12.0
|
||||||
|
},
|
||||||
|
"index01": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||||
|
},
|
||||||
|
"index02": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||||
|
},
|
||||||
|
"rvlr75": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||||
|
},
|
||||||
|
"rvlr77": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||||
|
},
|
||||||
|
"rev12": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 109.0
|
||||||
|
},
|
||||||
|
"rev13": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 82.0
|
||||||
|
},
|
||||||
|
"rev14": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 56.0
|
||||||
|
},
|
||||||
|
"rev54": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
|
||||||
|
},
|
||||||
|
"rev84": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||||
|
},
|
||||||
|
"bkc14": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
|
||||||
|
},
|
||||||
|
"bkc84": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||||
|
},
|
||||||
|
"ret12": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 42.0
|
||||||
|
},
|
||||||
|
"evtg04": {
|
||||||
|
"data_type": "int", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 845.0
|
||||||
|
},
|
||||||
|
"rev201": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||||
|
},
|
||||||
|
"rev202": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||||
|
},
|
||||||
|
"rev203": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||||
|
},
|
||||||
|
"rev223": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 49.0
|
||||||
|
},
|
||||||
|
"rev224": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
|
||||||
|
},
|
||||||
|
"rev225": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
|
||||||
|
},
|
||||||
|
"walshr02": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 2.0
|
||||||
|
},
|
||||||
|
"rev231": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 18101.0
|
||||||
|
},
|
||||||
|
"rev232": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38963.0
|
||||||
|
},
|
||||||
|
"rev233": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 68709.0
|
||||||
|
},
|
||||||
|
"rev252": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13988.0
|
||||||
|
},
|
||||||
|
"rev253": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12347.0
|
||||||
|
},
|
||||||
|
"all231": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38949.0
|
||||||
|
},
|
||||||
|
"at28a": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1330166.0
|
||||||
|
},
|
||||||
|
"at28b": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 351273.0
|
||||||
|
},
|
||||||
|
"at36s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||||
|
},
|
||||||
|
"bc20s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 510.0
|
||||||
|
},
|
||||||
|
"bc21s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 190.0
|
||||||
|
},
|
||||||
|
"bc28s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 205768.0
|
||||||
|
},
|
||||||
|
"bc36s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||||
|
},
|
||||||
|
"bc97a": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 184747.0
|
||||||
|
},
|
||||||
|
"bc98a": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 186395.0
|
||||||
|
},
|
||||||
|
"bc102s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 31000.0
|
||||||
|
},
|
||||||
|
"bc104s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 99.0
|
||||||
|
},
|
||||||
|
"bc107s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 15.0
|
||||||
|
},
|
||||||
|
"br20s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 495.0
|
||||||
|
},
|
||||||
|
"fi21s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162.0
|
||||||
|
},
|
||||||
|
"fi34s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
|
||||||
|
},
|
||||||
|
"fi35s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 70016.0
|
||||||
|
},
|
||||||
|
"g051s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||||
|
},
|
||||||
|
"g102s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||||
|
},
|
||||||
|
"g105s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||||
|
},
|
||||||
|
"g201a": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 255142.0
|
||||||
|
},
|
||||||
|
"g221d": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1.0
|
||||||
|
},
|
||||||
|
"g232s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 21.0
|
||||||
|
},
|
||||||
|
"g250a": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13.0
|
||||||
|
},
|
||||||
|
"g960s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 17.0
|
||||||
|
},
|
||||||
|
"g990s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 10.0
|
||||||
|
},
|
||||||
|
"mt20s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 261.0
|
||||||
|
},
|
||||||
|
"mt34s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
|
||||||
|
},
|
||||||
|
"pb28s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 192100.0
|
||||||
|
},
|
||||||
|
"pb34s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
|
||||||
|
},
|
||||||
|
"re28s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 311518.0
|
||||||
|
},
|
||||||
|
"re36s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||||
|
},
|
||||||
|
"re102s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 41750.0
|
||||||
|
},
|
||||||
|
"s004s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 179.0
|
||||||
|
},
|
||||||
|
"s114s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||||
|
},
|
||||||
|
"st32s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 221686.0
|
||||||
|
},
|
||||||
|
"g106s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 590.0
|
||||||
|
},
|
||||||
|
"g242b": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||||
|
},
|
||||||
|
"us21s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 153.0
|
||||||
|
},
|
||||||
|
"us34s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
|
||||||
|
},
|
||||||
|
"g403s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||||
|
},
|
||||||
|
"g405s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||||
|
},
|
||||||
|
"g408s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||||
|
},
|
||||||
|
"g411s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 8.0
|
||||||
|
},
|
||||||
|
"g416s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||||
|
},
|
||||||
|
"g417s": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||||
|
},
|
||||||
|
"agg901": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||||
|
},
|
||||||
|
"agg902": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||||
|
},
|
||||||
|
"agg908": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 69376.0
|
||||||
|
},
|
||||||
|
"agg911": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 10000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 107.0
|
||||||
|
},
|
||||||
|
"rle904": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 309634.0
|
||||||
|
},
|
||||||
|
"p02d": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||||
|
},
|
||||||
|
"p02h": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 7.0
|
||||||
|
},
|
||||||
|
"balmag01": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 474.0
|
||||||
|
},
|
||||||
|
"balmag02": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 516.0
|
||||||
|
},
|
||||||
|
"cv13": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||||
|
},
|
||||||
|
"cv17": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
|
||||||
|
},
|
||||||
|
"cv21": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171153.0
|
||||||
|
},
|
||||||
|
"cv25": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 33.0
|
||||||
|
},
|
||||||
|
"ct319": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 98.0
|
||||||
|
},
|
||||||
|
"ct320": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 52.0
|
||||||
|
},
|
||||||
|
"cta11": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 71.0
|
||||||
|
},
|
||||||
|
"cta20": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
|
||||||
|
},
|
||||||
|
"cta21": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 148.0
|
||||||
|
},
|
||||||
|
"paymnt08": {
|
||||||
|
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||||
|
},
|
||||||
|
"rev321": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 234408.0
|
||||||
|
},
|
||||||
|
"rev322": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 219698.0
|
||||||
|
},
|
||||||
|
"bkc321": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171024.0
|
||||||
|
},
|
||||||
|
"bkc322": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162590.0
|
||||||
|
},
|
||||||
|
"bkc323": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 152068.0
|
||||||
|
},
|
||||||
|
"bkc324": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 136763.0
|
||||||
|
},
|
||||||
|
"bkc327": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171143.0
|
||||||
|
},
|
||||||
|
"bkc328": {
|
||||||
|
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 164352.0
|
||||||
|
},
|
||||||
|
"eads66": {
|
||||||
|
"data_type": "float", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 820.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 1. Pre-processing: type casting and variable treatment
|
||||||
|
def pre_processing(input_dict):
|
||||||
|
processed = {}
|
||||||
|
for var, cfg in lookup_dict.items():
|
||||||
|
val = input_dict.get(var)
|
||||||
|
# Cast to required type
|
||||||
|
try:
|
||||||
|
if cfg.get("data_type") == "int":
|
||||||
|
val = int(val)
|
||||||
|
elif cfg.get("data_type") == "float":
|
||||||
|
val = float(val)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
val = None
|
||||||
|
# Variable treatment
|
||||||
|
vmin = cfg.get("valid_min")
|
||||||
|
vmax = cfg.get("valid_max")
|
||||||
|
if val is not None and not (math.isnan(vmin) or math.isnan(vmax)):
|
||||||
|
if cfg.get("default_treatment_type") == "unk":
|
||||||
|
if val < vmin or val > vmax:
|
||||||
|
val = float("nan")
|
||||||
|
cmin = cfg.get("observed_cap_min_value")
|
||||||
|
cmax = cfg.get("observed_cap_max_value")
|
||||||
|
if not math.isnan(val):
|
||||||
|
if cmin is not None and vmin <= val < cmin:
|
||||||
|
val = cmin
|
||||||
|
if cmax is not None and cmax < val <= vmax:
|
||||||
|
val = cmax
|
||||||
|
processed[var] = val
|
||||||
|
return processed
|
||||||
39
processing.py
Normal file
39
processing.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import joblib
|
||||||
|
import xgboost as xgb
|
||||||
|
import shap
|
||||||
|
|
||||||
|
def processing(processed_dict):
|
||||||
|
# model_path = r"C:\Users\abinisha\citrus\centurion-dataform-mcard\flowx\model_1\artifacts\xgboost_model_v2.joblib"
|
||||||
|
model_path = "./xgboost_model.joblib"
|
||||||
|
model = joblib.load(model_path)
|
||||||
|
explainer = shap.TreeExplainer(model)
|
||||||
|
feature_names = model.feature_names
|
||||||
|
|
||||||
|
# build and score
|
||||||
|
X = [processed_dict[f] for f in feature_names]
|
||||||
|
dmatrix = xgb.DMatrix([X], feature_names=feature_names)
|
||||||
|
pred = float(model.predict(dmatrix)[0])
|
||||||
|
|
||||||
|
# get SHAP values as 1×n_features
|
||||||
|
shap_matrix = explainer.shap_values(dmatrix)
|
||||||
|
shap_vals = shap_matrix[0] if getattr(shap_matrix, "ndim", 1) > 1 else shap_matrix
|
||||||
|
|
||||||
|
# rank & sort features by descending SHAP impact
|
||||||
|
ranked = sorted(zip(feature_names, shap_vals),
|
||||||
|
key=lambda fv: fv[1],
|
||||||
|
reverse=True)
|
||||||
|
|
||||||
|
# build a *list* of pre-sorted reason-codes
|
||||||
|
shape_reasoncode = [
|
||||||
|
{
|
||||||
|
"feature": feat,
|
||||||
|
"value": processed_dict[feat],
|
||||||
|
"shap_rank": i+1
|
||||||
|
}
|
||||||
|
for i, (feat, _) in enumerate(ranked)
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"prediction": pred,
|
||||||
|
"shape_reasoncode": shape_reasoncode
|
||||||
|
}
|
||||||
@ -1 +1,14 @@
|
|||||||
{}
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"application_id": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Unique identifier for the application."
|
||||||
|
},
|
||||||
|
"creditBureau": {
|
||||||
|
"type": ["object", "null"],
|
||||||
|
"description": "Credit bureau details"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -1 +1,3 @@
|
|||||||
{}
|
joblib == 1.3.2
|
||||||
|
xgboost == 2.1.4
|
||||||
|
shap == 0.46.0
|
||||||
|
|||||||
@ -1 +1,48 @@
|
|||||||
{}
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"application_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier for the application"
|
||||||
|
},
|
||||||
|
"prediction": {
|
||||||
|
"type": "number",
|
||||||
|
"description": "Model's predicted probability score"
|
||||||
|
},
|
||||||
|
"grade": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "HD Model Grade"
|
||||||
|
},
|
||||||
|
"reason_codes": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "List of reason codes explaining the model decision",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Feature or score reason code"
|
||||||
|
},
|
||||||
|
"rank": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Rank of importance (1 to 4)"
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"description": "Human-readable explanation for the reason"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["code", "rank", "description"]
|
||||||
|
},
|
||||||
|
"minItems": 1,
|
||||||
|
"maxItems": 4
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"application_id",
|
||||||
|
"prediction",
|
||||||
|
"grade",
|
||||||
|
"reason_codes"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|||||||
12611
test_block.py
Normal file
12611
test_block.py
Normal file
File diff suppressed because it is too large
Load Diff
BIN
xgboost_model.joblib
Normal file
BIN
xgboost_model.joblib
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user