Series M v1 model
All checks were successful
Build and Push Docker Image / test (push) Successful in 2m30s
Build and Push Docker Image / build_and_push (push) Successful in 4m50s

This commit is contained in:
Admin User 2025-06-13 18:01:40 +00:00
parent e2c2560e0a
commit 95034efa4e
10 changed files with 13342 additions and 25 deletions

View File

@ -1,5 +1,9 @@
import logging
import random # <-- import random to pick a number
from typing import Dict
from pre_processing import pre_processing
from processing import processing
from post_processing import post_processing
from parse_report import extract_model_variables
# Configure logging
logging.basicConfig(
@ -8,11 +12,58 @@ logging.basicConfig(
)
logger = logging.getLogger(__name__)
def __main__(application_id: str, tu_credit_report: dict) -> dict:
# Randomly pick an integer between 800 and 1400
hd_score_m1 = random.randint(800, 1400)
return {
"application_id": application_id,
"hd_score_m1": hd_score_m1
def __main__(application_id: str, creditBureau: dict) -> Dict:
"""
Process a single application record via the scoring pipeline.
Accepts extra keys; only required features are used and others ignored.
Returns:
A dict containing:
- prediction: float
- grade: str
- reason_description: str or None
"""
# whitelist of required feature keys (lowercase)
required_keys = {
'application_id',
'evtg04','eads66','s004s','utlmag02','cv13','rev231','rev14','g106s','ct320',
'mt34s','trv01','g250a','g960s','re36s','rev232','bc102s','utlmag04','re102s',
'agg911','p02h','g051s','g417s','bc20s','rev225','duemag01','fi21s','us21s',
'us34s','at36s','g102s','bkc14','balmag04','bkc323','bkc84','rev202','cta20',
'cta21','agg902','utlmag03','rev84','mt20s','bc21s','st32s','fi34s','rev201',
'bc97a','balmag01','g232s','balmag02','index02','bc28s','at28a','rev322','bkc322',
'g201a','g416s','walshr02','fi35s','rle904','re28s','rev233','rev224','rev252',
'rev253','cv21','rev321','index01','bkc328','br20s','pb34s','g403s','ct319','at28b',
'mnpmag03','utlmag01','bc36s','bkc321','agg908','cv25','bc107s','bkc327','g990s',
'pb28s','g411s','g221d','bc104s','g405s','p02d','ret12','bc98a','trv02','rev54',
'bkc324','s114s','paymnt08','g105s','rev223','rev12','rev13','all231','agg901',
'g408s','rev203','rvlr75','rvlr77','cv17','cta11','g242b'
}
record = extract_model_variables(creditBureau)
processed = pre_processing(record)
out = processing(processed)
final = post_processing(out)
final_result = {
'application_id': application_id,
'prediction': out['prediction'],
'grade': final['grade'],
'reason_description': final['reason_description'],
# 'tu_credit_report': record,
# 'pre_processed_output': processed
}
logger.info(f"final_result: {final_result}")
return final_result
if __name__ == "__main__":
import json, sys
with open(sys.argv[1]) as f:
data = json.load(f)
__main__(application_id=data["application_id"], creditBureau=data)

184
parse_report.py Normal file
View File

@ -0,0 +1,184 @@
import json
import sys
VARIABLE_TO_CODE_MAP = {
"agg901": "00H86",
"agg902": "00H86",
"agg908": "00H86",
"agg911": "00H86",
"all231": "00ASH",
"at28a": "00WR3",
"at28b": "00WR3",
"at36s": "00WR3",
"balmag01": "00H88",
"balmag02": "00H88",
"balmag04": "00AI5",
"bc102s": "00WR3",
"bc104s": "00WR3",
"bc107s": "00WR3",
"bc20s": "00WR3",
"bc21s": "00WR3",
"bc28s": "00WR3",
"bc36s": "00WR3",
"bc97a": "00WR3",
"bc98a": "00WR3",
"bkc14": "00ASF",
"bkc321": "00ASI",
"bkc322": "00ASI",
"bkc323": "00ASI",
"bkc324": "00ASI",
"bkc327": "00ASI",
"bkc328": "00ASI",
"bkc84": "00ASF",
"br20s": "00WR3",
"ct319": "00AI9",
"ct320": "00AI9",
"cta11": "00AI9",
"cta20": "00AI9",
"cta21": "00AI9",
"cv13": "00V26",
"cv17": "00V26",
"cv21": "00V26",
"cv25": "00V26",
"duemag01": "00AI5",
"fi21s": "00WR3",
"fi34s": "00WR3",
"fi35s": "00WR3",
"g051s": "00WR3",
"g102s": "00WR3",
"g105s": "00WR3",
"g106s": "00WR3",
"g201a": "00WR3",
"g221d": "00WR3",
"g232s": "00WR3",
"g242b": "00WR3",
"g250a": "00WR3",
"g403s": "00WR3",
"g405s": "00WR3",
"g408s": "00WR3",
"g411s": "00WR3",
"g416s": "00WR3",
"g417s": "00WR3",
"g960s": "00WR3",
"g990s": "00WR3",
"index01": "00V53",
"index02": "00V53",
"mnpmag03": "00AI5",
"mt20s": "00WR3",
"mt34s": "00WR3",
"p02d": "00WBO",
"p02h": "00WBO",
"paymnt08": "00H91",
"pb28s": "00WR3",
"pb34s": "00WR3",
"re102s": "00WR3",
"re28s": "00WR3",
"re36s": "00WR3",
"ret12": "00ASF",
"rev12": "00ASF",
"rev13": "00ASF",
"rev14": "00ASF",
"rev201": "00ASG",
"rev202": "00ASG",
"rev203": "00ASG",
"rev223": "00ASG",
"rev224": "00ASG",
"rev225": "00ASG",
"rev231": "00ASH",
"rev232": "00ASH",
"rev233": "00ASH",
"rev252": "00ASH",
"rev253": "00ASH",
"rev321": "00ASI",
"rev322": "00ASI",
"rev54": "00ASF",
"rev84": "00ASF",
"rle904": "00ASJ",
"rvlr75": "00WP4",
"rvlr77": "00WP4",
"s004s": "00WR3",
"s114s": "00WR3",
"st32s": "00WR3",
"trv01": "00H87",
"trv02": "00H87",
"us21s": "00WR3",
"us34s": "00WR3",
"utlmag01": "00AI5",
"utlmag02": "00AI5",
"utlmag03": "00AI5",
"utlmag04": "00AI5",
"walshr02": "00H90",
}
SCORE_TO_CODE_MAP = {
"evtg04": "001NN",
"eads66": "00WDC"
}
def safe_get(d, *keys):
"""Attempts both snake_case and camelCase keys."""
for key in keys:
if key in d:
return d[key]
return {}
def extract_model_variables(creditBureau: dict) -> dict:
variable_code_map = VARIABLE_TO_CODE_MAP
score_code_map = SCORE_TO_CODE_MAP
# Step 0: Extract application_id
extracted = {}
# Step 1: Locate add-on products with fallback casing
credit_bureau = safe_get(creditBureau, "creditBureau", "credit_bureau")
subject_node = safe_get(
safe_get(
safe_get(credit_bureau, "product"),
"subject"
),
"subject_record", "subjectRecord"
)
add_ons = safe_get(subject_node, "add_on_product", "addOnProduct")
value_map = {}
# Step 2: Flatten values
for product in add_ons:
code = product.get("code")
score_model = safe_get(product, "score_model", "scoreModel")
# 2a. Flatten characteristics (already done)
characteristics = score_model.get("characteristic", [])
for char in characteristics:
if isinstance(char, dict) and "id" in char and "value" in char:
key = char["id"].lower()
value_map[(code, key)] = char["value"]
# 2b. Also capture score result if present
score = safe_get(score_model, "score")
if score and "results" in score:
value_map[(code, "score")] = score["results"]
# Step 3.a: Use variable_to_code_map to fetch final vars
for var, code in variable_code_map.items():
extracted[var] = value_map.get((code, var))
# Step 3.b: Use score_to_code_map to fetch score vars
for key, code in SCORE_TO_CODE_MAP.items():
value = value_map.get((code, "score"))
if value:
extracted[key] = value.lstrip("+")
return extracted
if __name__ == "__main__":
for filename in sys.argv[1:]:
with open(filename) as f:
data = json.load(f)
print(f"\n--- Extracting from: {filename} ---")
result = extract_model_variables(data)
for k, v in result.items():
print(f"{k}: {v}")

61
post_processing.py Normal file
View File

@ -0,0 +1,61 @@
import math
def post_processing(processing_output):
prediction = processing_output["prediction"]
shape_reasoncode = processing_output["shape_reasoncode"]
# grade mapping: [0.00,0.01]→M1, (0.01,0.02]→M2, … cap at M14
if prediction < 0:
grade = "M14"
else:
m = math.ceil(prediction / 0.01)
m = max(m, 1)
m = min(m, 14)
grade = f"M{m}"
# if prediction ≤ 0.04, not declined
if prediction <= 0.04:
return {
"grade": grade,
"reason_description": None
}
conditions = {
'evtg04': lambda x: x < 700,
'eads66': lambda x: x < 700,
's004s': lambda x: x < 12,
'mt34s': lambda x: x > 95,
'ct320': lambda x: x <= 3,
'us21s': lambda x: x <= 3,
'utlmag02': lambda x: x > 300,
'trv01': lambda x: x > 3,
'us34s': lambda x: x > 90
}
reason_map = {
'evtg04': "System Generated",
'eads66': "System Generated",
's004s': "Length of time on file is too short",
'mt34s': "Too high open mortgage credit utilization recently",
'ct320': "Insufficient payment activity",
'us21s': "Length of time since most recent installment account has been established is too short",
'utlmag02': "Too high revolving credit utilization over the last 24 months",
'trv01': "Recency of a balance overlimit on a bankcard account",
'us34s': "Too high open unsecured installment credit utilization recently"
}
for item in shape_reasoncode:
feat = item["feature"]
val = item["value"]
cond = conditions.get(feat)
if cond and cond(val):
return {
"grade": grade,
"reason_description": reason_map[feat]
}
return {
"grade": grade,
"reason_description": None
}

360
pre_processing.py Normal file
View File

@ -0,0 +1,360 @@
import math
lookup_dict = {
"balmag04": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 427.0
},
"utlmag01": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 446.0
},
"utlmag02": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 486.0
},
"utlmag03": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 492.0
},
"utlmag04": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 414.0
},
"mnpmag03": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 494.0
},
"duemag01": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 488.0
},
"trv01": {
"data_type": "int", "valid_min": 1.0, "valid_max": 24.0, "default_treatment_type": "unk", "observed_cap_min_value": 1.0, "observed_cap_max_value": 24.0
},
"trv02": {
"data_type": "int", "valid_min": 0.0, "valid_max": 12.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12.0
},
"index01": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
},
"index02": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
},
"rvlr75": {
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
},
"rvlr77": {
"data_type": "float", "valid_min": 0.0, "valid_max": 5.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
},
"rev12": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 109.0
},
"rev13": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 82.0
},
"rev14": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 56.0
},
"rev54": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
},
"rev84": {
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
},
"bkc14": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
},
"bkc84": {
"data_type": "int", "valid_min": 0.0, "valid_max": 23.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
},
"ret12": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 42.0
},
"evtg04": {
"data_type": "int", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 845.0
},
"rev201": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
},
"rev202": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
},
"rev203": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
},
"rev223": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 49.0
},
"rev224": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 47.0
},
"rev225": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
},
"walshr02": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 2.0
},
"rev231": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 18101.0
},
"rev232": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38963.0
},
"rev233": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 68709.0
},
"rev252": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13988.0
},
"rev253": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 12347.0
},
"all231": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 38949.0
},
"at28a": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1330166.0
},
"at28b": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 351273.0
},
"at36s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
},
"bc20s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 510.0
},
"bc21s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 190.0
},
"bc28s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 205768.0
},
"bc36s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
},
"bc97a": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 184747.0
},
"bc98a": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 186395.0
},
"bc102s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 31000.0
},
"bc104s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 99.0
},
"bc107s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 15.0
},
"br20s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 495.0
},
"fi21s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162.0
},
"fi34s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
},
"fi35s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 70016.0
},
"g051s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
},
"g102s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
},
"g105s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
},
"g201a": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 255142.0
},
"g221d": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 1.0
},
"g232s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 21.0
},
"g250a": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 13.0
},
"g960s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 17.0
},
"g990s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 10.0
},
"mt20s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 261.0
},
"mt34s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
},
"pb28s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 192100.0
},
"pb34s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 100.0
},
"re28s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 311518.0
},
"re36s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 999.0
},
"re102s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 41750.0
},
"s004s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 179.0
},
"s114s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
},
"st32s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 221686.0
},
"g106s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 590.0
},
"g242b": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
},
"us21s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 153.0
},
"us34s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 101.0
},
"g403s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 5.0
},
"g405s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
},
"g408s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
},
"g411s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 8.0
},
"g416s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
},
"g417s": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 23.0
},
"agg901": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
},
"agg902": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
},
"agg908": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 69376.0
},
"agg911": {
"data_type": "float", "valid_min": 0.0, "valid_max": 10000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 107.0
},
"rle904": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 309634.0
},
"p02d": {
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 3.0
},
"p02h": {
"data_type": "int", "valid_min": 0.0, "valid_max": 99.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 7.0
},
"balmag01": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 474.0
},
"balmag02": {
"data_type": "int", "valid_min": 0.0, "valid_max": 600.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 516.0
},
"cv13": {
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
},
"cv17": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 9.0
},
"cv21": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171153.0
},
"cv25": {
"data_type": "int", "valid_min": 0.0, "valid_max": 100.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 33.0
},
"ct319": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 98.0
},
"ct320": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 52.0
},
"cta11": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 71.0
},
"cta20": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 45.0
},
"cta21": {
"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 148.0
},
"paymnt08": {
"data_type": "float", "valid_min": 0.0, "valid_max": 50.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 50.0
},
"rev321": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 234408.0
},
"rev322": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 219698.0
},
"bkc321": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171024.0
},
"bkc322": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 162590.0
},
"bkc323": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 152068.0
},
"bkc324": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 136763.0
},
"bkc327": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 171143.0
},
"bkc328": {
"data_type": "int", "valid_min": 0.0, "valid_max": 1000000000.0, "default_treatment_type": "unk", "observed_cap_min_value": 0.0, "observed_cap_max_value": 164352.0
},
"eads66": {
"data_type": "float", "valid_min": 300.0, "valid_max": 850.0, "default_treatment_type": "unk", "observed_cap_min_value": 300.0, "observed_cap_max_value": 820.0
}
}
# 1. Pre-processing: type casting and variable treatment
def pre_processing(input_dict):
processed = {}
for var, cfg in lookup_dict.items():
val = input_dict.get(var)
# Cast to required type
try:
if cfg.get("data_type") == "int":
val = int(val)
elif cfg.get("data_type") == "float":
val = float(val)
except (ValueError, TypeError):
val = None
# Variable treatment
vmin = cfg.get("valid_min")
vmax = cfg.get("valid_max")
if val is not None and not (math.isnan(vmin) or math.isnan(vmax)):
if cfg.get("default_treatment_type") == "unk":
if val < vmin or val > vmax:
val = float("nan")
cmin = cfg.get("observed_cap_min_value")
cmax = cfg.get("observed_cap_max_value")
if not math.isnan(val):
if cmin is not None and vmin <= val < cmin:
val = cmin
if cmax is not None and cmax < val <= vmax:
val = cmax
processed[var] = val
return processed

39
processing.py Normal file
View File

@ -0,0 +1,39 @@
import joblib
import xgboost as xgb
import shap
def processing(processed_dict):
# model_path = r"C:\Users\abinisha\citrus\centurion-dataform-mcard\flowx\model_1\artifacts\xgboost_model_v2.joblib"
model_path = "./xgboost_model.joblib"
model = joblib.load(model_path)
explainer = shap.TreeExplainer(model)
feature_names = model.feature_names
# build and score
X = [processed_dict[f] for f in feature_names]
dmatrix = xgb.DMatrix([X], feature_names=feature_names)
pred = float(model.predict(dmatrix)[0])
# get SHAP values as 1×n_features
shap_matrix = explainer.shap_values(dmatrix)
shap_vals = shap_matrix[0] if getattr(shap_matrix, "ndim", 1) > 1 else shap_matrix
# rank & sort features by descending SHAP impact
ranked = sorted(zip(feature_names, shap_vals),
key=lambda fv: fv[1],
reverse=True)
# build a *list* of pre-sorted reason-codes
shape_reasoncode = [
{
"feature": feat,
"value": processed_dict[feat],
"shap_rank": i+1
}
for i, (feat, _) in enumerate(ranked)
]
return {
"prediction": pred,
"shape_reasoncode": shape_reasoncode
}

View File

@ -6,16 +6,12 @@
"type": ["string", "null"],
"description": "Unique identifier for the application."
},
"tu_credit_report": {
"evtg04": {
"type": ["object", "null"],
"description": "User Credit report",
"properties": {
"creditBureau": {
"type": "object",
"description": "Credit bureau details"
}
}
"description": "VantageScore 4.0"
}
},
"required": []
"required": [
"application_id"
]
}

View File

@ -1,2 +1,3 @@
jmespath == 1.0.1
regex == 2023.12.25
joblib == 1.3.2
xgboost == 2.1.4
shap == 0.46.0

View File

@ -3,12 +3,26 @@
"type": "object",
"properties": {
"application_id": {
"type": ["string", "null"],
"description": "Application Key"
"type": "string",
"description": "Unique identifier for the application"
},
"hd_score_m1": {
"type": ["number", "null"],
"description": "HD Fraud Score M1"
"prediction": {
"type": "number",
"description": "Model's predicted probability score"
},
"grade": {
"type": "string",
"description": "HD Model Grade"
},
"reason_description": {
"type": "string",
"description": "Reason for the model decision"
}
}
},
"required": [
"application_id",
"prediction",
"grade",
"reason_description"
]
}

12611
test_block.py Normal file

File diff suppressed because it is too large Load Diff

BIN
xgboost_model.joblib Normal file

Binary file not shown.