import pandas ## Set up for dataset features = ['checking account balance', 'duration', 'credit history', 'purpose', 'amount', 'savings', 'employment', 'installment', 'marital status', 'other debtors', 'residence time', 'property', 'age', 'other installments', 'housing', 'credits', 'job', 'persons', 'phone', 'foreign', 'growth'] target = 'repaid' #df = pa import matplotlib.pyplot as plt numerical_features = ['duration', 'age', 'residence time', 'installment', 'amount', 'persons', 'credits'] quantitative_features = list(filter(lambda x: x not in numerical_features, features)) df_train = pandas.read_csv('D_train.csv', sep=' ', names=features+[target]) train_data = pandas.get_dummies(df_train, columns=quantitative_features, drop_first=True) encoded_features = list(filter(lambda x: x != target, train_data.columns)) X_train = train_data[encoded_features] y_train = train_data[target] df_valid = pandas.read_csv('D_valid.csv', sep=' ', names=features+[target]) valid_data = pandas.get_dummies(df_valid, columns=quantitative_features, drop_first=True) encoded_features = list(filter(lambda x: x != target, valid_data.columns)) X_valid = valid_data[encoded_features] y_valid = valid_data[target] df_test = pandas.read_csv('D_test.csv', sep=' ', names=features+[target]) test_data = pandas.get_dummies(df_test, columns=quantitative_features, drop_first=True) encoded_features = list(filter(lambda x: x != target, test_data.columns)) X_test = test_data[encoded_features] y_test = test_data[target] ## Test function def test_decision_maker(X_test, y_test, interest_rate, decision_maker): n_test_examples = len(X_test) utility = 0 ## Example test function - this is only an unbiased test if the data has not been seen in training total_amount = 0 total_utility = 0 total_repaid = 0 total_granted = 0 decision_maker.set_interest_rate(interest_rate) for t in range(n_test_examples): action = decision_maker.get_best_action(X_test.iloc[t]) good_loan = y_test.iloc[t] # assume the labels are correct duration = X_test['duration'].iloc[t] amount = X_test['amount'].iloc[t] # If we don't grant the loan then nothing happens if (action==1): total_granted += 1 if (good_loan != 1): utility -= amount else: utility += amount*(pow(1 + interest_rate, duration) - 1) total_repaid += 1 total_utility += utility total_amount += amount return utility, total_utility/total_amount, total_repaid, total_granted ## Main code ### Setup model import random_banker # this is a random banke import reference_banker #decision_maker = random_banker.RandomBanker() from sklearn import linear_model logistic = linear_model.LogisticRegression() decision_maker = reference_banker.ReferenceBanker(logistic) #import aleksaw_banker #decision_maker = aleksaw_banker.AlexBanker() interest_rate = 0.01 ## Test fot three datasets decision_maker.fit(X_train, y_train) print("Training performance") Ui, Ri, n_R, n_G = test_decision_maker(X_train, y_train, interest_rate, decision_maker) print("Average utility:", Ui) print("Average return on investment:", Ri) print("Average repayment rate:", n_R/n_G) print("Number granted:", n_G) print("Test performance") Ui, Ri, n_R, n_G = test_decision_maker(X_test, y_test, interest_rate, decision_maker) print("Average utility:", Ui) print("Average return on investment:", Ri) print("Average repayment rate:", n_R/n_G) print("Number granted:", n_G) print("Validation performance") Ui, Ri, n_R, n_G = test_decision_maker(X_valid, y_valid, interest_rate, decision_maker) print("Average utility:", Ui) print("Average return on investment:", Ri) print("Average repayment rate:", n_R/n_G) print("Number granted:", n_G)