Series M v1 model
This commit is contained in:
parent
e2c2560e0a
commit
95034efa4e
67
block.py
67
block.py
@ -1,5 +1,9 @@
|
||||
import logging
|
||||
import random # <-- import random to pick a number
|
||||
from typing import Dict
|
||||
from pre_processing import pre_processing
|
||||
from processing import processing
|
||||
from post_processing import post_processing
|
||||
from parse_report import extract_model_variables
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
@ -8,11 +12,58 @@ logging.basicConfig(
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def __main__(application_id: str, tu_credit_report: dict) -> dict:
|
||||
# Randomly pick an integer between 800 and 1400
|
||||
hd_score_m1 = random.randint(800, 1400)
|
||||
|
||||
return {
|
||||
"application_id": application_id,
|
||||
"hd_score_m1": hd_score_m1
|
||||
|
||||
def __main__(application_id: str, creditBureau: dict) -> Dict:
|
||||
"""
|
||||
Process a single application record via the scoring pipeline.
|
||||
Accepts extra keys; only required features are used and others ignored.
|
||||
|
||||
Returns:
|
||||
A dict containing:
|
||||
- prediction: float
|
||||
- grade: str
|
||||
- reason_description: str or None
|
||||
"""
|
||||
# whitelist of required feature keys (lowercase)
|
||||
required_keys = {
|
||||
'application_id',
|
||||
'evtg04','eads66','s004s','utlmag02','cv13','rev231','rev14','g106s','ct320',
|
||||
'mt34s','trv01','g250a','g960s','re36s','rev232','bc102s','utlmag04','re102s',
|
||||
'agg911','p02h','g051s','g417s','bc20s','rev225','duemag01','fi21s','us21s',
|
||||
'us34s','at36s','g102s','bkc14','balmag04','bkc323','bkc84','rev202','cta20',
|
||||
'cta21','agg902','utlmag03','rev84','mt20s','bc21s','st32s','fi34s','rev201',
|
||||
'bc97a','balmag01','g232s','balmag02','index02','bc28s','at28a','rev322','bkc322',
|
||||
'g201a','g416s','walshr02','fi35s','rle904','re28s','rev233','rev224','rev252',
|
||||
'rev253','cv21','rev321','index01','bkc328','br20s','pb34s','g403s','ct319','at28b',
|
||||
'mnpmag03','utlmag01','bc36s','bkc321','agg908','cv25','bc107s','bkc327','g990s',
|
||||
'pb28s','g411s','g221d','bc104s','g405s','p02d','ret12','bc98a','trv02','rev54',
|
||||
'bkc324','s114s','paymnt08','g105s','rev223','rev12','rev13','all231','agg901',
|
||||
'g408s','rev203','rvlr75','rvlr77','cv17','cta11','g242b'
|
||||
}
|
||||
|
||||
record = extract_model_variables(creditBureau)
|
||||
|
||||
processed = pre_processing(record)
|
||||
|
||||
out = processing(processed)
|
||||
|
||||
final = post_processing(out)
|
||||
|
||||
final_result = {
|
||||
'application_id': application_id,
|
||||
'prediction': out['prediction'],
|
||||
'grade': final['grade'],
|
||||
'reason_description': final['reason_description'],
|
||||
# 'tu_credit_report': record,
|
||||
# 'pre_processed_output': processed
|
||||
}
|
||||
|
||||
logger.info(f"final_result: {final_result}")
|
||||
|
||||
return final_result
|
||||
|
||||
if __name__ == "__main__":
|
||||
import json, sys
|
||||
with open(sys.argv[1]) as f:
|
||||
data = json.load(f)
|
||||
__main__(application_id=data["application_id"], creditBureau=data)
|
||||
184
parse_report.py
Normal file
184
parse_report.py
Normal file
@ -0,0 +1,184 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
VARIABLE_TO_CODE_MAP = {
|
||||
"agg901": "00H86",
|
||||
"agg902": "00H86",
|
||||
"agg908": "00H86",
|
||||
"agg911": "00H86",
|
||||
"all231": "00ASH",
|
||||
"at28a": "00WR3",
|
||||
"at28b": "00WR3",
|
||||
"at36s": "00WR3",
|
||||
"balmag01": "00H88",
|
||||
"balmag02": "00H88",
|
||||
"balmag04": "00AI5",
|
||||
"bc102s": "00WR3",
|
||||
"bc104s": "00WR3",
|
||||
"bc107s": "00WR3",
|
||||
"bc20s": "00WR3",
|
||||
"bc21s": "00WR3",
|
||||
"bc28s": "00WR3",
|
||||
"bc36s": "00WR3",
|
||||
"bc97a": "00WR3",
|
||||
"bc98a": "00WR3",
|
||||
"bkc14": "00ASF",
|
||||
"bkc321": "00ASI",
|
||||
"bkc322": "00ASI",
|
||||
"bkc323": "00ASI",
|
||||
"bkc324": "00ASI",
|
||||
"bkc327": "00ASI",
|
||||
"bkc328": "00ASI",
|
||||
"bkc84": "00ASF",
|
||||
"br20s": "00WR3",
|
||||
"ct319": "00AI9",
|
||||
"ct320": "00AI9",
|
||||
"cta11": "00AI9",
|
||||
"cta20": "00AI9",
|
||||
"cta21": "00AI9",
|
||||
"cv13": "00V26",
|
||||
"cv17": "00V26",
|
||||
"cv21": "00V26",
|
||||
"cv25": "00V26",
|
||||
"duemag01": "00AI5",
|
||||
"fi21s": "00WR3",
|
||||
"fi34s": "00WR3",
|
||||
"fi35s": "00WR3",
|
||||
"g051s": "00WR3",
|
||||
"g102s": "00WR3",
|
||||
"g105s": "00WR3",
|
||||
"g106s": "00WR3",
|
||||
"g201a": "00WR3",
|
||||
"g221d": "00WR3",
|
||||
"g232s": "00WR3",
|
||||
"g242b": "00WR3",
|
||||
"g250a": "00WR3",
|
||||
"g403s": "00WR3",
|
||||
"g405s": "00WR3",
|
||||
"g408s": "00WR3",
|
||||
"g411s": "00WR3",
|
||||
"g416s": "00WR3",
|
||||
"g417s": "00WR3",
|
||||
"g960s": "00WR3",
|
||||
"g990s": "00WR3",
|
||||
"index01": "00V53",
|
||||
"index02": "00V53",
|
||||
"mnpmag03": "00AI5",
|
||||
"mt20s": "00WR3",
|
||||
"mt34s": "00WR3",
|
||||
"p02d": "00WBO",
|
||||
"p02h": "00WBO",
|
||||
"paymnt08": "00H91",
|
||||
"pb28s": "00WR3",
|
||||
"pb34s": "00WR3",
|
||||
"re102s": "00WR3",
|
||||
"re28s": "00WR3",
|
||||
"re36s": "00WR3",
|
||||
"ret12": "00ASF",
|
||||
"rev12": "00ASF",
|
||||
"rev13": "00ASF",
|
||||
"rev14": "00ASF",
|
||||
"rev201": "00ASG",
|
||||
"rev202": "00ASG",
|
||||
"rev203": "00ASG",
|
||||
"rev223": "00ASG",
|
||||
"rev224": "00ASG",
|
||||
"rev225": "00ASG",
|
||||
"rev231": "00ASH",
|
||||
"rev232": "00ASH",
|
||||
"rev233": "00ASH",
|
||||
"rev252": "00ASH",
|
||||
"rev253": "00ASH",
|
||||
"rev321": "00ASI",
|
||||
"rev322": "00ASI",
|
||||
"rev54": "00ASF",
|
||||
"rev84": "00ASF",
|
||||
"rle904": "00ASJ",
|
||||
"rvlr75": "00WP4",
|
||||
"rvlr77": "00WP4",
|
||||
"s004s": "00WR3",
|
||||
"s114s": "00WR3",
|
||||
"st32s": "00WR3",
|
||||
"trv01": "00H87",
|
||||
"trv02": "00H87",
|
||||
"us21s": "00WR3",
|
||||
"us34s": "00WR3",
|
||||
"utlmag01": "00AI5",
|
||||
"utlmag02": "00AI5",
|
||||
"utlmag03": "00AI5",
|
||||
"utlmag04": "00AI5",
|
||||
"walshr02": "00H90",
|
||||
}
|
||||
|
||||
SCORE_TO_CODE_MAP = {
|
||||
"evtg04": "001NN",
|
||||
"eads66": "00WDC"
|
||||
}
|
||||
|
||||
|
||||
def safe_get(d, *keys):
|
||||
"""Attempts both snake_case and camelCase keys."""
|
||||
for key in keys:
|
||||
if key in d:
|
||||
return d[key]
|
||||
return {}
|
||||
|
||||
|
||||
def extract_model_variables(creditBureau: dict) -> dict:
|
||||
variable_code_map = VARIABLE_TO_CODE_MAP
|
||||
score_code_map = SCORE_TO_CODE_MAP
|
||||
# Step 0: Extract application_id
|
||||
extracted = {}
|
||||
|
||||
# Step 1: Locate add-on products with fallback casing
|
||||
credit_bureau = safe_get(creditBureau, "creditBureau", "credit_bureau")
|
||||
|
||||
subject_node = safe_get(
|
||||
safe_get(
|
||||
safe_get(credit_bureau, "product"),
|
||||
"subject"
|
||||
),
|
||||
"subject_record", "subjectRecord"
|
||||
)
|
||||
|
||||
add_ons = safe_get(subject_node, "add_on_product", "addOnProduct")
|
||||
value_map = {}
|
||||
|
||||
|
||||
# Step 2: Flatten values
|
||||
for product in add_ons:
|
||||
code = product.get("code")
|
||||
score_model = safe_get(product, "score_model", "scoreModel")
|
||||
|
||||
# 2a. Flatten characteristics (already done)
|
||||
characteristics = score_model.get("characteristic", [])
|
||||
for char in characteristics:
|
||||
if isinstance(char, dict) and "id" in char and "value" in char:
|
||||
key = char["id"].lower()
|
||||
value_map[(code, key)] = char["value"]
|
||||
|
||||
# 2b. Also capture score result if present
|
||||
score = safe_get(score_model, "score")
|
||||
if score and "results" in score:
|
||||
value_map[(code, "score")] = score["results"]
|
||||
|
||||
# Step 3.a: Use variable_to_code_map to fetch final vars
|
||||
for var, code in variable_code_map.items():
|
||||
extracted[var] = value_map.get((code, var))
|
||||
|
||||
# Step 3.b: Use score_to_code_map to fetch score vars
|
||||
for key, code in SCORE_TO_CODE_MAP.items():
|
||||
value = value_map.get((code, "score"))
|
||||
if value:
|
||||
extracted[key] = value.lstrip("+")
|
||||
|
||||
return extracted
|
||||
|
||||
if __name__ == "__main__":
|
||||
for filename in sys.argv[1:]:
|
||||
with open(filename) as f:
|
||||
data = json.load(f)
|
||||
print(f"\n--- Extracting from: {filename} ---")
|
||||
result = extract_model_variables(data)
|
||||
for k, v in result.items():
|
||||
print(f"{k}: {v}")
|
||||
61
post_processing.py
Normal file
61
post_processing.py
Normal file
@ -0,0 +1,61 @@
|
||||
import math
|
||||
|
||||
def post_processing(processing_output):
|
||||
prediction = processing_output["prediction"]
|
||||
shape_reasoncode = processing_output["shape_reasoncode"]
|
||||
|
||||
# grade mapping: [0.00,0.01]→M1, (0.01,0.02]→M2, … cap at M14
|
||||
if prediction < 0:
|
||||
grade = "M14"
|
||||
else:
|
||||
m = math.ceil(prediction / 0.01)
|
||||
m = max(m, 1)
|
||||
m = min(m, 14)
|
||||
grade = f"M{m}"
|
||||
|
||||
# if prediction ≤ 0.04, not declined
|
||||
if prediction <= 0.04:
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_description": None
|
||||
}
|
||||
|
||||
conditions = {
|
||||
'evtg04': lambda x: x < 700,
|
||||
'eads66': lambda x: x < 700,
|
||||
's004s': lambda x: x < 12,
|
||||
'mt34s': lambda x: x > 95,
|
||||
'ct320': lambda x: x <= 3,
|
||||
'us21s': lambda x: x <= 3,
|
||||
'utlmag02': lambda x: x > 300,
|
||||
'trv01': lambda x: x > 3,
|
||||
'us34s': lambda x: x > 90
|
||||
}
|
||||
|
||||
reason_map = {
|
||||
'evtg04': "System Generated",
|
||||
'eads66': "System Generated",
|
||||
's004s': "Length of time on file is too short",
|
||||
'mt34s': "Too high open mortgage credit utilization recently",
|
||||
'ct320': "Insufficient payment activity",
|
||||
'us21s': "Length of time since most recent installment account has been established is too short",
|
||||
'utlmag02': "Too high revolving credit utilization over the last 24 months",
|
||||
'trv01': "Recency of a balance overlimit on a bankcard account",
|
||||
'us34s': "Too high open unsecured installment credit utilization recently"
|
||||
}
|
||||
|
||||
for item in shape_reasoncode:
|
||||
feat = item["feature"]
|
||||
val = item["value"]
|
||||
cond = conditions.get(feat)
|
||||
if cond and cond(val):
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_description": reason_map[feat]
|
||||
}
|
||||
|
||||
return {
|
||||
"grade": grade,
|
||||
"reason_description": None
|
||||
}
|
||||
|
||||
360
pre_processing.py
Normal file
360
pre_processing.py
Normal file
@ -0,0 +1,360 @@
|
||||
import math
|
||||
|
||||
|
||||
lookup_dict = {
|
||||
"balmag04": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 427.0
|
||||
},
|
||||
"utlmag01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 446.0
|
||||
},
|
||||
"utlmag02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 486.0
|
||||
},
|
||||
"utlmag03": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 492.0
|
||||
},
|
||||
"utlmag04": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 414.0
|
||||
},
|
||||
"mnpmag03": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 494.0
|
||||
},
|
||||
"duemag01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 488.0
|
||||
},
|
||||
"trv01": {
|
||||
"data_type": "int", "valid_min": 1.0, "valid_max": 24.0, "default_treatment_type": "unk", "observed_cap_min_value": 1.0, "observed_cap_max_value": 24.0
|
||||
},
|
||||
"trv02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 12.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12.0
|
||||
},
|
||||
"index01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"index02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"rvlr75": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"rvlr77": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"rev12": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 109.0
|
||||
},
|
||||
"rev13": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 82.0
|
||||
},
|
||||
"rev14": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 56.0
|
||||
},
|
||||
"rev54": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
|
||||
},
|
||||
"rev84": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"bkc14": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
|
||||
},
|
||||
"bkc84": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"ret12": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 42.0
|
||||
},
|
||||
"evtg04": {
|
||||
"data_type": "int", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 845.0
|
||||
},
|
||||
"rev201": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev202": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev203": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev223": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 49.0
|
||||
},
|
||||
"rev224": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
|
||||
},
|
||||
"rev225": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
|
||||
},
|
||||
"walshr02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 2.0
|
||||
},
|
||||
"rev231": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 18101.0
|
||||
},
|
||||
"rev232": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38963.0
|
||||
},
|
||||
"rev233": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 68709.0
|
||||
},
|
||||
"rev252": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13988.0
|
||||
},
|
||||
"rev253": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12347.0
|
||||
},
|
||||
"all231": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38949.0
|
||||
},
|
||||
"at28a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1330166.0
|
||||
},
|
||||
"at28b": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 351273.0
|
||||
},
|
||||
"at36s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"bc20s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 510.0
|
||||
},
|
||||
"bc21s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 190.0
|
||||
},
|
||||
"bc28s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 205768.0
|
||||
},
|
||||
"bc36s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"bc97a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 184747.0
|
||||
},
|
||||
"bc98a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 186395.0
|
||||
},
|
||||
"bc102s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 31000.0
|
||||
},
|
||||
"bc104s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 99.0
|
||||
},
|
||||
"bc107s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 15.0
|
||||
},
|
||||
"br20s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 495.0
|
||||
},
|
||||
"fi21s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162.0
|
||||
},
|
||||
"fi34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
|
||||
},
|
||||
"fi35s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 70016.0
|
||||
},
|
||||
"g051s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"g102s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"g105s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"g201a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 255142.0
|
||||
},
|
||||
"g221d": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1.0
|
||||
},
|
||||
"g232s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 21.0
|
||||
},
|
||||
"g250a": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13.0
|
||||
},
|
||||
"g960s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 17.0
|
||||
},
|
||||
"g990s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 10.0
|
||||
},
|
||||
"mt20s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 261.0
|
||||
},
|
||||
"mt34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
|
||||
},
|
||||
"pb28s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 192100.0
|
||||
},
|
||||
"pb34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
|
||||
},
|
||||
"re28s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 311518.0
|
||||
},
|
||||
"re36s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
|
||||
},
|
||||
"re102s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 41750.0
|
||||
},
|
||||
"s004s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 179.0
|
||||
},
|
||||
"s114s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"st32s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 221686.0
|
||||
},
|
||||
"g106s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 590.0
|
||||
},
|
||||
"g242b": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"us21s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 153.0
|
||||
},
|
||||
"us34s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
|
||||
},
|
||||
"g403s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
|
||||
},
|
||||
"g405s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"g408s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"g411s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 8.0
|
||||
},
|
||||
"g416s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"g417s": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
|
||||
},
|
||||
"agg901": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"agg902": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"agg908": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 69376.0
|
||||
},
|
||||
"agg911": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 10000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 107.0
|
||||
},
|
||||
"rle904": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 309634.0
|
||||
},
|
||||
"p02d": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
|
||||
},
|
||||
"p02h": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 7.0
|
||||
},
|
||||
"balmag01": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 474.0
|
||||
},
|
||||
"balmag02": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 516.0
|
||||
},
|
||||
"cv13": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"cv17": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
|
||||
},
|
||||
"cv21": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171153.0
|
||||
},
|
||||
"cv25": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 33.0
|
||||
},
|
||||
"ct319": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 98.0
|
||||
},
|
||||
"ct320": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 52.0
|
||||
},
|
||||
"cta11": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 71.0
|
||||
},
|
||||
"cta20": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
|
||||
},
|
||||
"cta21": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 148.0
|
||||
},
|
||||
"paymnt08": {
|
||||
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
|
||||
},
|
||||
"rev321": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 234408.0
|
||||
},
|
||||
"rev322": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 219698.0
|
||||
},
|
||||
"bkc321": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171024.0
|
||||
},
|
||||
"bkc322": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162590.0
|
||||
},
|
||||
"bkc323": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 152068.0
|
||||
},
|
||||
"bkc324": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 136763.0
|
||||
},
|
||||
"bkc327": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171143.0
|
||||
},
|
||||
"bkc328": {
|
||||
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 164352.0
|
||||
},
|
||||
"eads66": {
|
||||
"data_type": "float", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 820.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# 1. Pre-processing: type casting and variable treatment
|
||||
def pre_processing(input_dict):
|
||||
processed = {}
|
||||
for var, cfg in lookup_dict.items():
|
||||
val = input_dict.get(var)
|
||||
# Cast to required type
|
||||
try:
|
||||
if cfg.get("data_type") == "int":
|
||||
val = int(val)
|
||||
elif cfg.get("data_type") == "float":
|
||||
val = float(val)
|
||||
except (ValueError, TypeError):
|
||||
val = None
|
||||
# Variable treatment
|
||||
vmin = cfg.get("valid_min")
|
||||
vmax = cfg.get("valid_max")
|
||||
if val is not None and not (math.isnan(vmin) or math.isnan(vmax)):
|
||||
if cfg.get("default_treatment_type") == "unk":
|
||||
if val < vmin or val > vmax:
|
||||
val = float("nan")
|
||||
cmin = cfg.get("observed_cap_min_value")
|
||||
cmax = cfg.get("observed_cap_max_value")
|
||||
if not math.isnan(val):
|
||||
if cmin is not None and vmin <= val < cmin:
|
||||
val = cmin
|
||||
if cmax is not None and cmax < val <= vmax:
|
||||
val = cmax
|
||||
processed[var] = val
|
||||
return processed
|
||||
39
processing.py
Normal file
39
processing.py
Normal file
@ -0,0 +1,39 @@
|
||||
import joblib
|
||||
import xgboost as xgb
|
||||
import shap
|
||||
|
||||
def processing(processed_dict):
|
||||
# model_path = r"C:\Users\abinisha\citrus\centurion-dataform-mcard\flowx\model_1\artifacts\xgboost_model_v2.joblib"
|
||||
model_path = "./xgboost_model.joblib"
|
||||
model = joblib.load(model_path)
|
||||
explainer = shap.TreeExplainer(model)
|
||||
feature_names = model.feature_names
|
||||
|
||||
# build and score
|
||||
X = [processed_dict[f] for f in feature_names]
|
||||
dmatrix = xgb.DMatrix([X], feature_names=feature_names)
|
||||
pred = float(model.predict(dmatrix)[0])
|
||||
|
||||
# get SHAP values as 1×n_features
|
||||
shap_matrix = explainer.shap_values(dmatrix)
|
||||
shap_vals = shap_matrix[0] if getattr(shap_matrix, "ndim", 1) > 1 else shap_matrix
|
||||
|
||||
# rank & sort features by descending SHAP impact
|
||||
ranked = sorted(zip(feature_names, shap_vals),
|
||||
key=lambda fv: fv[1],
|
||||
reverse=True)
|
||||
|
||||
# build a *list* of pre-sorted reason-codes
|
||||
shape_reasoncode = [
|
||||
{
|
||||
"feature": feat,
|
||||
"value": processed_dict[feat],
|
||||
"shap_rank": i+1
|
||||
}
|
||||
for i, (feat, _) in enumerate(ranked)
|
||||
]
|
||||
|
||||
return {
|
||||
"prediction": pred,
|
||||
"shape_reasoncode": shape_reasoncode
|
||||
}
|
||||
@ -6,16 +6,12 @@
|
||||
"type": ["string", "null"],
|
||||
"description": "Unique identifier for the application."
|
||||
},
|
||||
"tu_credit_report": {
|
||||
"evtg04": {
|
||||
"type": ["object", "null"],
|
||||
"description": "User Credit report",
|
||||
"properties": {
|
||||
"creditBureau": {
|
||||
"type": "object",
|
||||
"description": "Credit bureau details"
|
||||
}
|
||||
}
|
||||
"description": "VantageScore 4.0"
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
"required": [
|
||||
"application_id"
|
||||
]
|
||||
}
|
||||
|
||||
@ -1,2 +1,3 @@
|
||||
jmespath == 1.0.1
|
||||
regex == 2023.12.25
|
||||
joblib == 1.3.2
|
||||
xgboost == 2.1.4
|
||||
shap == 0.46.0
|
||||
|
||||
@ -3,12 +3,26 @@
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"application_id": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Application Key"
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the application"
|
||||
},
|
||||
"hd_score_m1": {
|
||||
"type": ["number", "null"],
|
||||
"description": "HD Fraud Score M1"
|
||||
"prediction": {
|
||||
"type": "number",
|
||||
"description": "Model's predicted probability score"
|
||||
},
|
||||
"grade": {
|
||||
"type": "string",
|
||||
"description": "HD Model Grade"
|
||||
},
|
||||
"reason_description": {
|
||||
"type": "string",
|
||||
"description": "Reason for the model decision"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"application_id",
|
||||
"prediction",
|
||||
"grade",
|
||||
"reason_description"
|
||||
]
|
||||
}
|
||||
|
||||
12611
test_block.py
Normal file
12611
test_block.py
Normal file
File diff suppressed because it is too large
Load Diff
BIN
xgboost_model.joblib
Normal file
BIN
xgboost_model.joblib
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user