import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import scipy as scp

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.neighbors.kde import KernelDensity

import itertools
from sklearn.metrics import roc_curve, auc, roc_auc_score, log_loss, accuracy_score, confusion_matrix

from  sklearn.model_selection import train_test_split
from  sklearn import preprocessing
from  sklearn.ensemble import RandomForestRegressor
from  sklearn.pipeline import make_pipeline
from  sklearn.model_selection import GridSearchCV
from  sklearn.metrics import mean_squared_error, r2_score

df = pd.read_excel("/home/gogol/mypy/default of credit card clients.xls", header = 1)

df.shape

(30000, 25)

df.head()

df.columns

Index([u'ID', u'LIMIT_BAL', u'SEX', u'EDUCATION', u'MARRIAGE', u'AGE',
       u'PAY_0', u'PAY_2', u'PAY_3', u'PAY_4', u'PAY_5', u'PAY_6',
       u'BILL_AMT1', u'BILL_AMT2', u'BILL_AMT3', u'BILL_AMT4', u'BILL_AMT5',
       u'BILL_AMT6', u'PAY_AMT1', u'PAY_AMT2', u'PAY_AMT3', u'PAY_AMT4',
       u'PAY_AMT5', u'PAY_AMT6', u'default payment next month'],
      dtype='object')

df_pay_status = df.loc[:, 'PAY_0':'PAY_6']
df_pay_status.columns = ['PAY_1', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6']
df_pay_status.head(10)

plt.tight_layout()
fig, axis = plt.subplots(2,3)
fig.set_size_inches(17,7)
ttl = fig.suptitle('Distribution of dalays in the past 6 months')
ttl.set_position([.5, 1.05])


left   =  0.125  # the left side of the subplots of the figure
right  =  0.9    # the right side of the subplots of the figure
bottom =  0.1    # the bottom of the subplots of the figure
top    =  0.9    # the top of the subplots of the figure
wspace =  .5     # the amount of width reserved for blank space between subplots
hspace = 1.1 # the amount of height reserved for white space between subplots

plt.subplots_adjust(
    left    =  left, 
    bottom  =  bottom, 
    right   =  right, 
    top     =  top, 
    wspace  =  wspace, 
    hspace  =  hspace
)

columns = df_pay_status.columns

for i in range(len(columns)):
    row, col = int(i/3), i%3
    d = df_pay_status[columns[i]].value_counts()
    e = d.index
    g = sns.barplot(x = e, y = d, ax = axis[row, col], palette = 'Blues_d' )

<matplotlib.figure.Figure at 0x7f2ccb9daa90>

df_bill_amt = df.loc[:, 'BILL_AMT1':'BILL_AMT6']

df_pay_amt = df.loc[:, 'PAY_AMT1':'PAY_AMT6']

df.drop(['default payment next month'], axis =1)

bill_description = df_bill_amt.describe()

plt.tight_layout()
fig, axis = plt.subplots(1,3)
fig.set_size_inches(18,5)
ttl = fig.suptitle('Distribution of Mean, STD & Min of Bill Amounts in the past 6 months')
ttl.set_position([.5, 1.05])

left   =  0.125  # the left side of the subplots of the figure
right  =  0.9    # the right side of the subplots of the figure
bottom =  0.1    # the bottom of the subplots of the figure
top    =  0.9    # the top of the subplots of the figure
wspace =  .5     # the amount of width reserved for blank space between subplots
hspace =   2 # the amount of height reserved for white space between subplots

plt.subplots_adjust(
    left    =  left, 
    bottom  =  bottom, 
    right   =  right, 
    top     =  top, 
    wspace  =  wspace, 
    hspace  =  hspace
)

columns = bill_description.columns
sns.set(font_scale = 1.1)
sns.set_style("whitegrid")

for i in range(len(columns)/2):
    col = i%3
    j = i+1
    X = bill_description.columns[0:6]
    c = bill_description.index[j]
    Y = bill_description.loc[c]
    
    g = sns.barplot(x = X, y = Y, ax = axis[col], label='small', palette = 'Blues_d')
    g.set_xticklabels(bill_description.columns[0:6], rotation=30)

    g.set_title(c)

<matplotlib.figure.Figure at 0x7f2cc3523cd0>

pay_description = df_pay_amt.describe()

pay_description

plt.tight_layout()
fig, axis = plt.subplots(1,2)
fig.set_size_inches(18,5)
ttl = fig.suptitle('Distribution of Mean & STD of Payment Amounts in the past 6 months')
ttl.set_position([.5, 1.05])

left   =  0.125  # the left side of the subplots of the figure
right  =  0.9    # the right side of the subplots of the figure
bottom =  0.1    # the bottom of the subplots of the figure
top    =  0.9    # the top of the subplots of the figure
wspace =  .5     # the amount of width reserved for blank space between subplots
hspace = 2 # the amount of height reserved for white space between subplots

plt.subplots_adjust(
    left    =  left, 
    bottom  =  bottom, 
    right   =  right, 
    top     =  top, 
    wspace  =  wspace, 
    hspace  =  hspace
)

columns = pay_description.columns
sns.set(font_scale = 1.1)


for i in range(2):
    col = i%3
    j = i+1
    
    X = pay_description.columns[0:6]
    c = pay_description.index[j]
    Y = pay_description.loc[c]
    
    g = sns.barplot(x = X, y = Y, ax = axis[col], label='small', palette = 'Blues_d')
    g.set_xticklabels(pay_description.columns[0:6], rotation=30)

    g.set_title(c)

<matplotlib.figure.Figure at 0x7f2cc362d450>

df_pay_amt.min()

PAY_AMT1    0
PAY_AMT2    0
PAY_AMT3    0
PAY_AMT4    0
PAY_AMT5    0
PAY_AMT6    0
dtype: int64

d = df['LIMIT_BAL'].value_counts()

fig = plt.figure()
fig.set_size_inches(30,5)
sns.set_style("whitegrid")

ttl = fig.suptitle('Distribution of Limit Balance')
ttl.set_position([.5, 1.05])

dd = d.index
np.sort(dd)
g = sns.barplot(x = dd, y = d, label='small', palette = 'Blues_d')
g.set_xticklabels(d.index, rotation=90)

[Text(0,0,u'50000'),
 Text(0,0,u'20000'),
 Text(0,0,u'30000'),
 Text(0,0,u'80000'),
 Text(0,0,u'200000'),
 Text(0,0,u'150000'),
 Text(0,0,u'100000'),
 Text(0,0,u'180000'),
 Text(0,0,u'360000'),
 Text(0,0,u'60000'),
 Text(0,0,u'140000'),
 Text(0,0,u'230000'),
 Text(0,0,u'70000'),
 Text(0,0,u'210000'),
 Text(0,0,u'130000'),
 Text(0,0,u'120000'),
 Text(0,0,u'500000'),
 Text(0,0,u'160000'),
 Text(0,0,u'90000'),
 Text(0,0,u'240000'),
 Text(0,0,u'110000'),
 Text(0,0,u'300000'),
 Text(0,0,u'170000'),
 Text(0,0,u'260000'),
 Text(0,0,u'280000'),
 Text(0,0,u'10000'),
 Text(0,0,u'220000'),
 Text(0,0,u'250000'),
 Text(0,0,u'290000'),
 Text(0,0,u'320000'),
 Text(0,0,u'310000'),
 Text(0,0,u'400000'),
 Text(0,0,u'270000'),
 Text(0,0,u'350000'),
 Text(0,0,u'40000'),
 Text(0,0,u'190000'),
 Text(0,0,u'340000'),
 Text(0,0,u'390000'),
 Text(0,0,u'330000'),
 Text(0,0,u'420000'),
 Text(0,0,u'450000'),
 Text(0,0,u'380000'),
 Text(0,0,u'430000'),
 Text(0,0,u'440000'),
 Text(0,0,u'470000'),
 Text(0,0,u'460000'),
 Text(0,0,u'480000'),
 Text(0,0,u'410000'),
 Text(0,0,u'370000'),
 Text(0,0,u'490000'),
 Text(0,0,u'550000'),
 Text(0,0,u'520000'),
 Text(0,0,u'510000'),
 Text(0,0,u'600000'),
 Text(0,0,u'580000'),
 Text(0,0,u'610000'),
 Text(0,0,u'530000'),
 Text(0,0,u'560000'),
 Text(0,0,u'620000'),
 Text(0,0,u'700000'),
 Text(0,0,u'570000'),
 Text(0,0,u'630000'),
 Text(0,0,u'640000'),
 Text(0,0,u'540000'),
 Text(0,0,u'590000'),
 Text(0,0,u'710000'),
 Text(0,0,u'680000'),
 Text(0,0,u'750000'),
 Text(0,0,u'650000'),
 Text(0,0,u'670000'),
 Text(0,0,u'720000'),
 Text(0,0,u'660000'),
 Text(0,0,u'16000'),
 Text(0,0,u'780000'),
 Text(0,0,u'740000'),
 Text(0,0,u'730000'),
 Text(0,0,u'800000'),
 Text(0,0,u'760000'),
 Text(0,0,u'690000'),
 Text(0,0,u'1000000'),
 Text(0,0,u'327680')]

d.head()

50000     3365
20000     1976
30000     1610
80000     1567
200000    1528
Name: LIMIT_BAL, dtype: int64

Y = df['default payment next month']
pd.DataFrame(Y)

d = Y.value_counts()

fig1 = plt.figure()
fig1.set_size_inches(10,5)
sns.set_style("whitegrid")

ttl = fig1.suptitle('Defaulters out of 30,000 sample size (Defaulters = 1)')
ttl.set_position([.5, 1.05])

Defaulter_vs_NonDefaulters = d. index
Number_of_Customers = Y.value_counts()

g1 = sns.barplot(x = Defaulter_vs_NonDefaulters, 
                y = Number_of_Customers, 
                saturation = 1,
                palette = 'Blues_d'
                )

X = df.drop(['default payment next month'], axis = 1)

Y = df['default payment next month']

df['SEX'] = df['SEX'].astype('category').cat.rename_categories(['M', 'F'])
df['MARRIAGE'] = df['MARRIAGE'].astype('category').cat.rename_categories(['na', 'married', 'single', 'other'])
df['age_cat'] = pd.cut(df['AGE'], range(0, 100, 10), right=False)

fig, ax = plt.subplots(1,3)
fig.set_size_inches(20,5)
fig.suptitle('Defaulting by absolute numbers, for various demographics')

df_demo_1 = df.groupby(['default payment next month', 'SEX']).size()
df_demo_1 = df_demo_1.unstack(level = 1)
df_demo_1.plot(kind = 'bar', ax = ax[0])

df_demo_1 = df.groupby(['default payment next month', 'MARRIAGE']).size()
df_demo_1 = df_demo_1.unstack(level = 1)
df_demo_1.plot(kind = 'bar', ax = ax[1])

df_demo_1 = df.groupby(['default payment next month', 'age_cat']).size()
df_demo_1 = df_demo_1.unstack(level = 1)
df_demo_1.plot(kind = 'bar', ax = ax[2])

<matplotlib.axes._subplots.AxesSubplot at 0x7f2ccb5aff10>

fig, ax = plt.subplots(1,3)
fig.set_size_inches(20,5)
fig.suptitle('Defaulting by relative numbers given each class, for various demographics')

d = df.groupby(['default payment next month', 'SEX']).size().unstack(level=1)
d = d / d.sum()
p = d.plot(kind='bar', ax=ax[0])

d = df.groupby(['default payment next month', 'MARRIAGE']).size().unstack(level=1)
d = d / d.sum()
p = d.plot(kind='bar', ax=ax[1])

d = df.groupby(['default payment next month', 'age_cat']).size().unstack(level=1)
d = d / d.sum()
p = d.plot(kind='bar', ax=ax[2])

df.head()

from math import log

df['pay_amt_avg_log'] = df_pay_amt.mean(axis = 1).apply(lambda x : log(x+1))

df['pay_amt_avg'] = df_pay_amt.mean(axis = 1)
df['pay_std'] = df_pay_amt.std(axis=1)

df['pay_rel_amt_1'] = df_pay_amt['PAY_AMT1']/df['pay_amt_avg']
df['pay_rel_amt_2'] = df_pay_amt['PAY_AMT2']/df['pay_amt_avg']
df['pay_rel_amt_3'] = df_pay_amt['PAY_AMT3']/df['pay_amt_avg']
df['pay_rel_amt_4'] = df_pay_amt['PAY_AMT4']/df['pay_amt_avg']
df['pay_rel_amt_5'] = df_pay_amt['PAY_AMT5']/df['pay_amt_avg']
df['pay_rel_amt_6'] = df_pay_amt['PAY_AMT6']/df['pay_amt_avg']

df['bill_amt_avg'] = df_bill_amt.mean(axis = 1)

df['bill_amt_avg_log'] = df_bill_amt.mean(axis = 1).apply(lambda x : log(x+1) if x>0 else 0)

df['billamt_rel_1'] = df_bill_amt['BILL_AMT1']/df['LIMIT_BAL']
df['billamt_rel_2'] = df_bill_amt['BILL_AMT2']/df['LIMIT_BAL']
df['billamt_rel_3'] = df_bill_amt['BILL_AMT3']/df['LIMIT_BAL']
df['billamt_rel_4'] = df_bill_amt['BILL_AMT4']/df['LIMIT_BAL']
df['billamt_rel_5'] = df_bill_amt['BILL_AMT5']/df['LIMIT_BAL']
df['billamt_rel_6'] = df_bill_amt['BILL_AMT6']/df['LIMIT_BAL']

df['LIMIT_BAL_LOG'] = df['LIMIT_BAL'].apply(lambda x: log(x+1))
df['LIMIT_BAL_CAT'] = pd.cut(df['LIMIT_BAL'], range(0, int(1e6), 10000), right=False)

df.columns

Index([                        u'ID',                  u'LIMIT_BAL',
                              u'SEX',                  u'EDUCATION',
                         u'MARRIAGE',                        u'AGE',
                            u'PAY_0',                      u'PAY_2',
                            u'PAY_3',                      u'PAY_4',
                            u'PAY_5',                      u'PAY_6',
                        u'BILL_AMT1',                  u'BILL_AMT2',
                        u'BILL_AMT3',                  u'BILL_AMT4',
                        u'BILL_AMT5',                  u'BILL_AMT6',
                         u'PAY_AMT1',                   u'PAY_AMT2',
                         u'PAY_AMT3',                   u'PAY_AMT4',
                         u'PAY_AMT5',                   u'PAY_AMT6',
       u'default payment next month',                    u'age_cat',
                  u'pay_amt_avg_log',                u'pay_amt_avg',
                          u'pay_std',              u'pay_rel_amt_1',
                    u'pay_rel_amt_2',              u'pay_rel_amt_3',
                    u'pay_rel_amt_4',              u'pay_rel_amt_5',
                    u'pay_rel_amt_6',               u'bill_amt_avg',
                 u'bill_amt_avg_log',              u'billamt_rel_1',
                    u'billamt_rel_2',              u'billamt_rel_3',
                    u'billamt_rel_4',              u'billamt_rel_5',
                    u'billamt_rel_6',              u'LIMIT_BAL_LOG',
                    u'LIMIT_BAL_CAT'],
      dtype='object')

df_bill_amt = df.loc[:, 'billamt_rel_1': 'billamt_rel_6']

df_bill_amt.head()

#Plotting Kernal Density Estimation on Relative Bill Amount (bill_amount/credit limit) Columns:

fig, ax = plt.subplots(2,3)
fig.set_size_inches(15,5)
fig.set_size_inches(17,7)
ttl = fig.suptitle('Distribution of bill relative to credit in the path 6 months')
ttl.set_position([.5, 1.05])

left   =  0.125  # the left side of the subplots of the figure
right  =  0.9    # the right side of the subplots of the figure
bottom =  0.1    # the bottom of the subplots of the figure
top    =  0.9    # the top of the subplots of the figure
wspace =  .5     # the amount of width reserved for blank space between subplots
hspace = 1.1 # the amount of height reserved for white space between subplots

plt.subplots_adjust(
    left    =  left, 
    bottom  =  bottom, 
    right   =  right, 
    top     =  top, 
    wspace  =  wspace, 
    hspace  =  hspace
)

columns = df_bill_amt.columns

for i in range(len(columns)):
    row, col = int(i/3), i%3
    #Plot the Histogram#
    n, bins, patches = ax[row,col].hist(df_bill_amt[columns[i]], 50, normed=1, facecolor='green', alpha=0.75)
    
    #Estimate Kernal Density#
    kde = KernelDensity(kernel='gaussian', bandwidth=0.2).fit(df_bill_amt[columns[i]].values.reshape(-1, 1))
    x_grid = np.linspace(df_bill_amt[columns[i]].min(), df_bill_amt[columns[i]].max(), 1000)
    log_pdf = kde.score_samples(x_grid.reshape(-1, 1))
    
    # add the density line
    ax[row,col].plot(x_grid, np.exp(log_pdf), color='blue', alpha=0.5, lw=3)
    ax[row,col].set_title(columns[i])

X = df.drop(['ID', 'LIMIT_BAL'], axis = 1)

X.head()

X1 = pd.concat([ X, pd.get_dummies(df['SEX'], drop_first = True) ], axis = 1)
X1 = pd.concat([ X, pd.get_dummies(df['MARRIAGE'], drop_first = True) ], axis = 1)

X1 = X1.drop(['SEX', 'MARRIAGE'], axis = 1)

X1.head()

D = pd.concat([X1, Y], axis = 1)

D = D.rename(index=str, columns={"default payment next month": "target"})

D.columns

Index([       u'EDUCATION',              u'AGE',            u'PAY_0',
                  u'PAY_2',            u'PAY_3',            u'PAY_4',
                  u'PAY_5',            u'PAY_6',        u'BILL_AMT1',
              u'BILL_AMT2',        u'BILL_AMT3',        u'BILL_AMT4',
              u'BILL_AMT5',        u'BILL_AMT6',         u'PAY_AMT1',
               u'PAY_AMT2',         u'PAY_AMT3',         u'PAY_AMT4',
               u'PAY_AMT5',         u'PAY_AMT6',           u'target',
                u'age_cat',  u'pay_amt_avg_log',      u'pay_amt_avg',
                u'pay_std',    u'pay_rel_amt_1',    u'pay_rel_amt_2',
          u'pay_rel_amt_3',    u'pay_rel_amt_4',    u'pay_rel_amt_5',
          u'pay_rel_amt_6',     u'bill_amt_avg', u'bill_amt_avg_log',
          u'billamt_rel_1',    u'billamt_rel_2',    u'billamt_rel_3',
          u'billamt_rel_4',    u'billamt_rel_5',    u'billamt_rel_6',
          u'LIMIT_BAL_LOG',    u'LIMIT_BAL_CAT',          u'married',
                 u'single',            u'other',           u'target'],
      dtype='object')

formula = 'target ~ '

# original features & engineered features
formula += '+ C(married) + C(single) + C(other) + C(married) +  C(EDUCATION) + AGE '
formula += '+ PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 '
formula += '+ C(age_cat) + C(LIMIT_BAL_CAT) + C(LIMIT_BAL_LOG) + pay_amt_avg + pay_std '
formula += '+ pay_amt_avg_log + pay_rel_amt_1 + pay_rel_amt_2 + pay_rel_amt_3 + pay_rel_amt_4 + pay_rel_amt_5 + pay_rel_amt_6 '
formula += '+ bill_amt_avg + bill_amt_avg_log + billamt_rel_1 + billamt_rel_2 + billamt_rel_3 + billamt_rel_4 + billamt_rel_5 + billamt_rel_6'

formula

'target ~ + C(married) + C(single) + C(other) + C(married) +  C(EDUCATION) + AGE + PAY_0 + PAY_2 + PAY_3 + PAY_4 + PAY_5 + PAY_6 + C(age_cat) + C(LIMIT_BAL_CAT) + C(LIMIT_BAL_LOG) + pay_amt_avg + pay_std + pay_amt_avg_log + pay_rel_amt_1 + pay_rel_amt_2 + pay_rel_amt_3 + pay_rel_amt_4 + pay_rel_amt_5 + pay_rel_amt_6 + bill_amt_avg + bill_amt_avg_log + billamt_rel_1 + billamt_rel_2 + billamt_rel_3 + billamt_rel_4 + billamt_rel_5 + billamt_rel_6'

from patsy import dmatrices

Y, X = dmatrices(formula, data= D, return_type='dataframe')
Y = Y.iloc[:, 1]

import warnings
from sklearn.feature_selection import SelectKBest, f_classif

warnings.simplefilter(action='ignore', category=(UserWarning,RuntimeWarning))

selector = SelectKBest(f_classif, 25)
selector.fit(X, Y)

SelectKBest(k=25, score_func=<function f_classif at 0x7f2cc2c119b0>)

top_indices = np.nan_to_num(selector.scores_).argsort()[-25:][::-1]
selector.scores_[top_indices]
X.columns[top_indices]

Index([u'PAY_0', u'PAY_2', u'PAY_3', u'PAY_4', u'PAY_5', u'PAY_6',
       u'pay_amt_avg_log', u'billamt_rel_6', u'billamt_rel_5',
       u'billamt_rel_4', u'billamt_rel_3', u'billamt_rel_2', u'billamt_rel_1',
       u'pay_amt_avg',
       u'C(LIMIT_BAL_CAT)[T.Interval(20000, 30000, closed='left')]',
       u'C(LIMIT_BAL_LOG)[T.9.9035375512861705]',
       u'C(LIMIT_BAL_LOG)[T.10.308985993422082]',
       u'C(LIMIT_BAL_CAT)[T.Interval(30000, 40000, closed='left')]',
       u'pay_std', u'C(EDUCATION)[T.1]',
       u'C(LIMIT_BAL_CAT)[T.Interval(10000, 20000, closed='left')]',
       u'C(LIMIT_BAL_LOG)[T.13.122365377402328]',
       u'C(LIMIT_BAL_CAT)[T.Interval(500000, 510000, closed='left')]',
       u'C(EDUCATION)[T.2]', u'C(LIMIT_BAL_LOG)[T.10.819798284210286]'],
      dtype='object')

from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()
scaler.fit(X)

MinMaxScaler(copy=True, feature_range=(0, 1))

from sklearn.pipeline import Pipeline

preprocess = Pipeline([('anova', selector), ('scale', scaler)])
preprocess.fit(X,Y)

X_prep = preprocess.transform(X)

X_prep = pd.DataFrame(X_prep)

Y = pd.DataFrame(Y)

from sklearn.decomposition import PCA

pca = PCA(n_components=2)

principalComponents = pca.fit_transform(X_prep)

Df = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2'])

Df_p = pd.concat([Df, Y], axis = 1)

Df_p.columns

Index([u'principal component 1', u'principal component 2', u'target[1]'], dtype='object')

import seaborn as sns

g = sns.lmplot(x = 'principal component 1', 
               y = 'principal component 2', 
               data = Df_p,
               hue = 'target[1]'
              )

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-124-9fb311d1a933> in <module>()
      6                y = 'principal component 2',
      7                data = Df_p,
----> 8                hue = 'target[1]'
      9               )

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/regression.pyc in lmplot(x, y, data, hue, col, row, palette, col_wrap, size, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws)
    588         scatter_kws=scatter_kws, line_kws=line_kws,
    589         )
--> 590     facets.map_dataframe(regplot, x, y, **regplot_kws)
    591 
    592     # Add a legend

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/axisgrid.pyc in map_dataframe(self, func, *args, **kwargs)
    795 
    796             # Draw the plot
--> 797             self._facet_plot(func, ax, args, kwargs)
    798 
    799         # Finalize the annotations and layout

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/axisgrid.pyc in _facet_plot(self, func, ax, plot_args, plot_kwargs)
    813 
    814         # Draw the plot
--> 815         func(*plot_args, **plot_kwargs)
    816 
    817         # Sort out the supporting information

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/regression.pyc in regplot(x, y, data, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, dropna, x_jitter, y_jitter, label, color, marker, scatter_kws, line_kws, ax)
    788     scatter_kws["marker"] = marker
    789     line_kws = {} if line_kws is None else copy.copy(line_kws)
--> 790     plotter.plot(ax, scatter_kws, line_kws)
    791     return ax
    792 

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/regression.pyc in plot(self, ax, scatter_kws, line_kws)
    340             self.scatterplot(ax, scatter_kws)
    341         if self.fit_reg:
--> 342             self.lineplot(ax, line_kws)
    343 
    344         # Label the axes

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/regression.pyc in lineplot(self, ax, kws)
    385 
    386         # Fit the regression model
--> 387         grid, yhat, err_bands = self.fit_regression(ax)
    388 
    389         # Get set default aesthetics

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/regression.pyc in fit_regression(self, ax, x_range, grid)
    208             yhat, yhat_boots = self.fit_logx(grid)
    209         else:
--> 210             yhat, yhat_boots = self.fit_fast(grid)
    211 
    212         # Compute the confidence interval at each grid point

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/regression.pyc in fit_fast(self, grid)
    228 
    229         beta_boots = algo.bootstrap(X, y, func=reg_func,
--> 230                                     n_boot=self.n_boot, units=self.units).T
    231         yhat_boots = grid.dot(beta_boots).T
    232         return yhat, yhat_boots

/home/gogol/anaconda2/lib/python2.7/site-packages/seaborn/algorithms.pyc in bootstrap(*args, **kwargs)
     72     boot_dist = []
     73     for i in range(int(n_boot)):
---> 74         resampler = rs.randint(0, n, n)
     75         sample = [a.take(resampler, axis=0) for a in args]
     76         boot_dist.append(func(*sample, **func_kwargs))

mtrand.pyx in mtrand.RandomState.randint()

ValueError: low >= high

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_prep, Y, test_size=0.2, random_state=42)

import itertools
from sklearn.metrics import roc_curve, auc, roc_auc_score, log_loss, accuracy_score, confusion_matrix

#Plotting Confusion Matrix

def plot_cm(ax, y_true, y_pred, classes, title, th=0.5, cmap=plt.cm.Blues):
    y_pred_labels = (y_pred>th).astype(int)
    
    cm = confusion_matrix(y_true, y_pred_labels)
    
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.set_title(title)

    tick_marks = np.arange(len(classes))
    ax.set_xticks(tick_marks)
    ax.set_yticks(tick_marks)
    ax.set_xticklabels(classes)
    ax.set_yticklabels(classes)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        ax.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    ax.set_ylabel('True label')
    ax.set_xlabel('Predicted label')

#Plotting ROC Curve and AUC

def plot_auc(ax, y_train, y_train_pred, y_test, y_test_pred, th=0.5):

    y_train_pred_labels = (y_train_pred>th).astype(int)
    y_test_pred_labels  = (y_test_pred>th).astype(int)

    fpr_train, tpr_train, _ = roc_curve(y_train,y_train_pred)
    roc_auc_train = auc(fpr_train, tpr_train)
    acc_train = accuracy_score(y_train, y_train_pred_labels)

    fpr_test, tpr_test, _ = roc_curve(y_test,y_test_pred)
    roc_auc_test = auc(fpr_test, tpr_test)
    acc_test = accuracy_score(y_test, y_test_pred_labels)

    ax.plot(fpr_train, tpr_train)
    ax.plot(fpr_test, tpr_test)

    ax.plot([0, 1], [0, 1], 'k--')

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC curve')
    
    train_text = 'train acc = {:.3f}, auc = {:.2f}'.format(acc_train, roc_auc_train)
    test_text = 'test acc = {:.3f}, auc = {:.2f}'.format(acc_test, roc_auc_test)
    ax.legend([train_text, test_text])

from sklearn import linear_model

# Create logistic regression object
regr = linear_model.LogisticRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

predictions = regr.predict(X_test)

y_train_pred = regr.predict_proba(X_train)[:,1]
y_test_pred = regr.predict_proba(X_test)[:,1]

threshold = 0.5

fig,ax = plt.subplots(1,3)
fig.set_size_inches(15,5)

plot_cm(ax[0],  y_train, y_train_pred, [0,1], 'Confusion matrix (TRAIN)', threshold)
plot_cm(ax[1],  y_test, y_test_pred,   [0,1], 'Confusion matrix (TEST)', threshold)
plot_auc(ax[2], y_train, y_train_pred, y_test, y_test_pred, threshold)
    
plt.tight_layout()
plt.show()

#Using RandomForest

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=500, min_samples_leaf=5)
rf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=5, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

predictions = rf.predict(X_test)

y_train_pred = rf.predict_proba(X_train)[:,1]
y_test_pred = rf.predict_proba(X_test)[:,1]

threshold = 0.5

fig,ax = plt.subplots(1,3)
fig.set_size_inches(15,5)

plot_cm(ax[0],  y_train, y_train_pred, [0,1], 'Confusion matrix (TRAIN)', threshold)
plot_cm(ax[1],  y_test, y_test_pred,   [0,1], 'Confusion matrix (TEST)', threshold)
plot_auc(ax[2], y_train, y_train_pred, y_test, y_test_pred, threshold)
    
plt.tight_layout()
plt.show()

rf.feature_importances_

array([ 0.00731615,  0.00974254,  0.00122019,  0.00240541,  0.00207649,
        0.00027795,  0.00246733,  0.00219625,  0.00399343,  0.00025585,
        0.17306891,  0.08011276,  0.04581053,  0.03485668,  0.02850615,
        0.02540373,  0.06749281,  0.06594812,  0.06762942,  0.06831926,
        0.06589945,  0.0622378 ,  0.06101222,  0.05998936,  0.06176122])

pd.DataFrame(X_train)

X_train.shape

(22853, 25)

	ID	LIMIT_BAL	SEX	EDUCATION	MARRIAGE	AGE	PAY_0	PAY_2	PAY_3	PAY_4	...	BILL_AMT3	BILL_AMT4	BILL_AMT5	BILL_AMT6	PAY_AMT1	PAY_AMT2	PAY_AMT3	PAY_AMT4	PAY_AMT5	PAY_AMT6
0	1	20000	2	2	1	24	2	2	-1	-1	...	689	0	0	0	0	689	0	0	0	0
1	2	120000	2	2	2	26	-1	2	0	0	...	2682	3272	3455	3261	0	1000	1000	1000	0	2000
2	3	90000	2	2	2	34	0	0	0	0	...	13559	14331	14948	15549	1518	1500	1000	1000	1000	5000
3	4	50000	2	2	1	37	0	0	0	0	...	49291	28314	28959	29547	2000	2019	1200	1100	1069	1000
4	5	50000	1	2	1	57	-1	0	-1	0	...	35835	20940	19146	19131	2000	36681	10000	9000	689	679
5	6	50000	1	1	2	37	0	0	0	0	...	57608	19394	19619	20024	2500	1815	657	1000	1000	800
6	7	500000	1	1	2	29	0	0	0	0	...	445007	542653	483003	473944	55000	40000	38000	20239	13750	13770
7	8	100000	2	2	2	23	0	-1	-1	0	...	601	221	-159	567	380	601	0	581	1687	1542
8	9	140000	2	3	1	28	0	0	2	0	...	12108	12211	11793	3719	3329	0	432	1000	1000	1000
9	10	20000	1	3	2	35	-2	-2	-2	-2	...	0	0	13007	13912	0	0	0	13007	1122	0
10	11	200000	2	3	2	34	0	0	2	0	...	5535	2513	1828	3731	2306	12	50	300	3738	66
11	12	260000	2	1	2	51	-1	-1	-1	-1	...	9966	8517	22287	13668	21818	9966	8583	22301	0	3640
12	13	630000	2	2	2	41	-1	0	-1	-1	...	6500	6500	6500	2870	1000	6500	6500	6500	2870	0
13	14	70000	1	2	2	30	1	2	2	0	...	65701	66782	36137	36894	3200	0	3000	3000	1500	0
14	15	250000	1	1	2	29	0	0	0	0	...	63561	59696	56875	55512	3000	3000	3000	3000	3000	3000
15	16	50000	2	3	3	23	1	2	0	0	...	28116	28771	29531	30211	0	1500	1100	1200	1300	1100
16	17	20000	1	1	2	24	0	0	2	2	...	17428	18338	17905	19104	3200	0	1500	0	1650	0
17	18	320000	1	1	1	49	0	0	0	-1	...	194663	70074	5856	195599	10358	10000	75940	20000	195599	50000
18	19	360000	2	1	1	49	1	-2	-2	-2	...	0	0	0	0	0	0	0	0	0	0
19	20	180000	2	1	2	29	1	-2	-2	-2	...	0	0	0	0	0	0	0	0	0	0
20	21	130000	2	3	2	39	0	0	0	0	...	24489	20616	11802	930	3000	1537	1000	2000	930	33764
21	22	120000	2	2	1	39	-1	-1	-1	-1	...	316	0	632	316	316	316	0	632	316	0
22	23	70000	2	2	2	26	2	0	0	2	...	45020	44006	46905	46012	2007	3582	0	3601	0	1820
23	24	450000	2	1	1	40	-2	-2	-2	-2	...	1473	560	0	0	19428	1473	560	0	0	1128
24	25	90000	1	1	2	23	0	0	0	-1	...	0	5398	6360	8292	5757	0	5398	1200	2045	2000
25	26	50000	1	3	2	23	0	0	0	0	...	36023	28967	29829	30046	1973	1426	1001	1432	1062	997
26	27	60000	1	1	2	27	1	-2	-1	-1	...	259	-57	127	-189	0	1000	0	500	0	1000
27	28	50000	2	3	2	30	0	0	0	0	...	17163	17878	18931	19617	1300	1300	1000	1500	1000	1012
28	29	50000	2	3	1	47	-1	-1	-1	-1	...	3416	2040	30430	257	3415	3421	2044	30430	257	0
29	30	50000	1	1	2	26	0	0	0	0	...	17496	17907	18375	11400	1500	1500	1000	1000	1600	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
29970	29971	360000	1	1	1	34	-1	-1	-1	0	...	64069	49005	8676	19487	52951	64535	8907	53	19584	16080
29971	29972	80000	1	3	1	36	0	0	0	0	...	68279	69674	71070	73612	2395	2500	2530	2556	3700	3000
29972	29973	190000	1	1	1	37	0	0	0	0	...	5869	29223	19616	148482	2000	3869	25128	10115	148482	4800
29973	29974	230000	1	2	1	35	1	-2	-2	-2	...	0	0	0	0	0	0	0	0	0	0
29974	29975	50000	1	2	1	37	1	2	2	2	...	4328	2846	1585	1324	0	3000	0	0	1000	1000
29975	29976	220000	1	2	1	41	0	0	-1	-1	...	1369	5924	1759	1824	8840	6643	5924	1759	1824	7022
29976	29977	40000	1	2	2	47	2	2	3	2	...	53415	51259	47151	46934	4000	0	2000	0	3520	0
29977	29978	420000	1	1	2	34	0	0	0	0	...	140011	141695	144839	147954	7000	7000	5500	5500	5600	5000
29978	29979	310000	1	2	1	39	0	0	0	0	...	233854	219409	216540	210675	10029	9218	10029	8049	8040	10059
29979	29980	180000	1	1	1	32	-2	-2	-2	-2	...	0	0	0	0	0	0	0	0	0	0
29980	29981	50000	1	3	2	42	0	0	0	0	...	49397	50360	19971	19694	10000	4000	5000	3000	4500	2000
29981	29982	50000	1	2	1	44	1	2	2	2	...	33101	28192	22676	14647	2300	1700	0	517	503	585
29982	29983	90000	1	2	1	36	0	0	0	0	...	10306	11328	12036	14329	1500	1500	1500	1200	2500	0
29983	29984	20000	1	2	1	44	-2	-2	-2	-2	...	2712	2882	9235	1719	2890	2720	2890	9263	1824	1701
29984	29985	30000	1	2	2	38	-1	-1	-2	-1	...	2939	1993	1907	3319	923	2977	1999	3057	3319	1000
29985	29986	240000	1	1	2	30	-2	-2	-2	-2	...	0	0	0	0	0	0	0	0	0	0
29986	29987	360000	1	1	2	35	-1	-1	-2	-2	...	0	0	0	0	0	0	0	0	0	0
29987	29988	130000	1	1	2	34	0	0	0	0	...	15546	108047	93708	97353	3000	2000	93000	4000	5027	4005
29988	29989	250000	1	1	1	34	0	0	0	0	...	243075	245750	175005	179687	65000	8800	9011	6000	7000	6009
29989	29990	150000	1	1	2	35	-1	-1	-1	-1	...	-3	780	0	0	9054	0	783	0	0	0
29990	29991	140000	1	2	1	41	0	0	0	0	...	139110	138262	49675	46121	6000	7000	4228	1505	2000	2000
29991	29992	210000	1	2	1	34	3	2	2	2	...	2500	2500	2500	2500	0	0	0	0	0	0
29992	29993	10000	1	3	1	43	0	0	0	-2	...	0	0	0	0	2000	0	0	0	0	0
29993	29994	100000	1	1	2	38	0	-1	-1	0	...	102996	70626	69473	55004	2000	111784	4000	3000	2000	2000
29994	29995	80000	1	2	2	34	2	2	2	2	...	79384	77519	82607	81158	7000	3500	0	7000	0	4000
29995	29996	220000	1	3	1	39	0	0	0	0	...	208365	88004	31237	15980	8500	20000	5003	3047	5000	1000
29996	29997	150000	1	3	2	43	-1	-1	-1	-1	...	3502	8979	5190	0	1837	3526	8998	129	0	0
29997	29998	30000	1	2	2	37	4	3	2	-1	...	2758	20878	20582	19357	0	0	22000	4200	2000	3100
29998	29999	80000	1	3	1	41	1	-1	0	0	...	76304	52774	11855	48944	85900	3409	1178	1926	52964	1804
29999	30000	50000	1	2	1	46	0	0	0	0	...	49764	36535	32428	15313	2078	1800	1430	1000	1000	1000

	PAY_AMT1	PAY_AMT2	PAY_AMT3	PAY_AMT4	PAY_AMT5	PAY_AMT6
count	30000.000000	3.000000e+04	30000.00000	30000.000000	30000.000000	30000.000000
mean	5663.580500	5.921163e+03	5225.68150	4826.076867	4799.387633	5215.502567
std	16563.280354	2.304087e+04	17606.96147	15666.159744	15278.305679	17777.465775
min	0.000000	0.000000e+00	0.00000	0.000000	0.000000	0.000000
25%	1000.000000	8.330000e+02	390.00000	296.000000	252.500000	117.750000
50%	2100.000000	2.009000e+03	1800.00000	1500.000000	1500.000000	1500.000000
75%	5006.000000	5.000000e+03	4505.00000	4013.250000	4031.500000	4000.000000
max	873552.000000	1.684259e+06	896040.00000	621000.000000	426529.000000	528666.000000

	default payment next month
0	1
1	1
2	0
3	0
4	0
5	0
6	0
7	0
8	0
9	0
10	0
11	0
12	0
13	1
14	0
15	0
16	1
17	0
18	0
19	0
20	0
21	1
22	1
23	1
24	0
25	0
26	1
27	0
28	0
29	0
...	...
29970	0
29971	0
29972	0
29973	1
29974	1
29975	0
29976	1
29977	0
29978	0
29979	0
29980	0
29981	0
29982	1
29983	0
29984	0
29985	0
29986	0
29987	0
29988	0
29989	0
29990	0
29991	1
29992	0
29993	0
29994	1
29995	0
29996	0
29997	1
29998	1
29999	1

	billamt_rel_1	billamt_rel_2	billamt_rel_3	billamt_rel_4	billamt_rel_5	billamt_rel_6
0	0.195650	0.155100	0.034450	0.000000	0.000000	0.000000
1	0.022350	0.014375	0.022350	0.027267	0.028792	0.027175
2	0.324878	0.155856	0.150656	0.159233	0.166089	0.172767
3	0.939800	0.964660	0.985820	0.566280	0.579180	0.590940
4	0.172340	0.113400	0.716700	0.418800	0.382920	0.382620

	SEX	EDUCATION	MARRIAGE	AGE	PAY_0	PAY_2	PAY_3	PAY_4	PAY_5	PAY_6	...	bill_amt_avg	bill_amt_avg_log	billamt_rel_1	billamt_rel_2	billamt_rel_3	billamt_rel_4	billamt_rel_5	billamt_rel_6	LIMIT_BAL_LOG	LIMIT_BAL_CAT
0	F	2	married	24	2	2	-1	-1	-2	-2	...	1284.000000	7.158514	0.195650	0.155100	0.034450	0.000000	0.000000	0.000000	9.903538	[20000, 30000)
1	F	2	single	26	-1	2	0	0	0	2	...	2846.166667	7.954080	0.022350	0.014375	0.022350	0.027267	0.028792	0.027175	11.695255	[120000, 130000)
2	F	2	single	34	0	0	0	0	0	0	...	16942.166667	9.737620	0.324878	0.155856	0.150656	0.159233	0.166089	0.172767	11.407576	[90000, 100000)
3	F	2	married	37	0	0	0	0	0	0	...	38555.666667	10.559884	0.939800	0.964660	0.985820	0.566280	0.579180	0.590940	10.819798	[50000, 60000)
4	M	2	married	57	-1	0	-1	0	0	0	...	18223.166667	9.810504	0.172340	0.113400	0.716700	0.418800	0.382920	0.382620	10.819798	[50000, 60000)

My Data Journey

Saturday, February 10, 2018

Credit Card Defaulter Prediction

No comments:

Post a Comment

	PAY_1	PAY_2	PAY_3	PAY_4	PAY_5	PAY_6
0	2	2	-1	-1	-2	-2
1	-1	2	0	0	0	2
2	0	0	0	0	0	0
3	0	0	0	0	0	0
4	-1	0	-1	0	0	0
5	0	0	0	0	0	0
6	0	0	0	0	0	0
7	0	-1	-1	0	0	-1
8	0	0	2	0	0	0
9	-2	-2	-2	-2	-1	-1

	ID	LIMIT_BAL	SEX	EDUCATION	MARRIAGE	AGE	PAY_0	PAY_2	PAY_3	PAY_4	...	BILL_AMT5	BILL_AMT6	PAY_AMT1	PAY_AMT2	PAY_AMT3	PAY_AMT4	PAY_AMT5	PAY_AMT6	default payment next month	age_cat
0	1	20000	F	2	married	24	2	2	-1	-1	...	0	0	0	689	0	0	0	0	1	[20, 30)
1	2	120000	F	2	single	26	-1	2	0	0	...	3455	3261	0	1000	1000	1000	0	2000	1	[20, 30)
2	3	90000	F	2	single	34	0	0	0	0	...	14948	15549	1518	1500	1000	1000	1000	5000	0	[30, 40)
3	4	50000	F	2	married	37	0	0	0	0	...	28959	29547	2000	2019	1200	1100	1069	1000	0	[30, 40)
4	5	50000	M	2	married	57	-1	0	-1	0	...	19146	19131	2000	36681	10000	9000	689	679	0	[50, 60)

	EDUCATION	AGE	PAY_0	PAY_2	PAY_3	PAY_4	PAY_5	PAY_6	BILL_AMT1	BILL_AMT2	...	billamt_rel_2	billamt_rel_3	billamt_rel_4	billamt_rel_5	billamt_rel_6	LIMIT_BAL_LOG	LIMIT_BAL_CAT	married	single
0	2	24	2	2	-1	-1	-2	-2	3913	3102	...	0.155100	0.034450	0.000000	0.000000	0.000000	9.903538	[20000, 30000)	1	0
1	2	26	-1	2	0	0	0	2	2682	1725	...	0.014375	0.022350	0.027267	0.028792	0.027175	11.695255	[120000, 130000)	0	1
2	2	34	0	0	0	0	0	0	29239	14027	...	0.155856	0.150656	0.159233	0.166089	0.172767	11.407576	[90000, 100000)	0	1
3	2	37	0	0	0	0	0	0	46990	48233	...	0.964660	0.985820	0.566280	0.579180	0.590940	10.819798	[50000, 60000)	1	0
4	2	57	-1	0	-1	0	0	0	8617	5670	...	0.113400	0.716700	0.418800	0.382920	0.382620	10.819798	[50000, 60000)	1	0

	0	1	2	3	4	5	6	7	8	9	...	15	16	17	18	19	20	21	22	23	24
0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.004409	0.000628	0.580996	0.284853	0.270284	0.147272	0.318566	0.270724	0.408937
1	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.006641	0.000409	0.612622	0.232766	0.308661	0.174488	0.365644	0.324939	0.463906
2	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.010118	0.018488	0.645138	0.216661	0.277087	0.142489	0.225716	0.222095	0.356743
3	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.009563	0.002383	0.640787	0.220395	0.302259	0.171015	0.307590	0.261560	0.401205
4	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.009724	0.000376	0.642075	0.135019	0.223633	0.117356	0.265898	0.214340	0.349995
5	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.091626	0.203063	0.815354	0.114522	0.199267	0.099591	0.218084	0.156844	0.368135
6	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.024567	0.053166	0.713664	0.090341	0.181196	0.088969	0.210769	0.150844	0.355932
7	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.017581	0.001611	0.687820	0.225891	0.307666	0.171510	0.357408	0.315907	0.461681
8	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.018059	0.004777	0.689893	0.091829	0.184899	0.091070	0.214920	0.156156	0.283980
9	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.013655	0.015064	0.668296	0.087488	0.179791	0.087594	0.215962	0.170810	0.295453
10	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.000627	0.000202	0.430521	0.088021	0.179836	0.087758	0.211210	0.151573	0.280330
11	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.003041	0.000649	0.552308	0.224770	0.263916	0.143897	0.313170	0.229700	0.365744
12	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.008737	0.004013	0.633808	0.160226	0.243201	0.130788	0.271243	0.177673	0.309571
13	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.010472	0.004780	0.647798	0.163552	0.250678	0.135392	0.299900	0.252639	0.398869
14	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.004471	0.000343	0.582068	0.131388	0.220178	0.115130	0.261394	0.208340	0.343179
15	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.005020	0.000977	0.591018	0.187289	0.261216	0.138368	0.278529	0.221589	0.332519
16	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.003203	0.006071	0.556316	0.204664	0.271849	0.094084	0.222572	0.152146	0.279798
17	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.4	0.013303	0.016184	0.666280	0.219056	0.301897	0.175881	0.366252	0.325762	0.433422
18	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.006908	0.009418	0.615666	0.091148	0.182614	0.087468	0.216058	0.150844	0.279798
19	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	...	0.4	0.001992	0.001252	0.519650	0.207084	0.292836	0.164193	0.358781	0.319500	0.464163
20	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.2	0.002789	0.001354	0.545627	0.216765	0.300665	0.170985	0.325052	0.211014	0.347815
21	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.4	0.003639	0.002157	0.566168	0.197155	0.281248	0.153279	0.335226	0.293531	0.436676
22	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.020778	0.019782	0.700726	0.087763	0.181281	0.094911	0.214451	0.153187	0.284684
23	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.018021	0.010638	0.689728	0.091140	0.187608	0.090588	0.211063	0.156970	0.283974
24	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	...	0.2	0.001285	0.000768	0.485814	0.135069	0.229209	0.124841	0.282794	0.229136	0.365820
25	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.009832	0.006837	0.642927	0.089870	0.184756	0.088115	0.216809	0.159607	0.291115
26	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.005747	0.009460	0.601456	0.129880	0.216677	0.113239	0.262159	0.206723	0.388415
27	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.4	0.006588	0.000947	0.612001	0.196121	0.280767	0.155696	0.333607	0.291818	0.435365
28	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.035108	0.059510	0.741245	0.090333	0.184840	0.093627	0.271777	0.210153	0.306517
29	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.000525	0.001243	0.416930	0.087708	0.179552	0.087513	0.212793	0.150844	0.279798
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
22823	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.002820	0.002894	0.546475	0.116121	0.199301	0.096321	0.227034	0.167515	0.288839
22824	1.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	1.0	...	0.2	0.040565	0.067882	0.752407	0.117194	0.206261	0.099589	0.224703	0.167846	0.299583
22825	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.007300	0.001121	0.619927	0.159873	0.246316	0.133097	0.294277	0.245094	0.383556
22826	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.004914	0.000756	0.589366	0.174640	0.254742	0.136754	0.301930	0.255341	0.395613
22827	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.017121	0.012615	0.685772	0.088086	0.176323	0.089884	0.214177	0.155623	0.284946
22828	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.047166	0.094511	0.764053	0.099132	0.307042	0.170647	0.304845	0.257362	0.396784
22829	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	...	0.4	0.001762	0.002202	0.510213	0.205025	0.291885	0.160084	0.356894	0.309222	0.452195
22830	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.004449	0.004080	0.581691	0.135951	0.226768	0.118907	0.244684	0.187490	0.322607
22831	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	...	0.7	0.003984	0.000000	0.573169	0.142666	0.243670	0.138740	0.317940	0.287900	0.442650
22832	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.012815	0.013227	0.663393	0.090560	0.182976	0.088787	0.218402	0.152664	0.299003
22833	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.025725	0.035682	0.717224	0.107657	0.198269	0.100884	0.210841	0.151340	0.296137
22834	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.4	0.000929	0.000839	0.460839	0.107911	0.197270	0.100915	0.235359	0.181414	0.311652
22835	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.002313	0.001218	0.531196	0.137724	0.229849	0.122156	0.274325	0.223658	0.360305
22836	0.0	0.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	...	0.2	0.001170	0.001385	0.478632	0.235573	0.310587	0.163948	0.318952	0.272891	0.421519
22837	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.009457	0.005203	0.639924	0.222044	0.300630	0.169557	0.358533	0.315199	0.457801
22838	0.0	1.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	0.0	...	0.2	0.001693	0.000296	0.507129	0.204749	0.292821	0.166703	0.330085	0.291134	0.437664
22839	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.2	0.001911	0.000652	0.516455	0.133966	0.224163	0.117432	0.267405	0.212318	0.347392
22840	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.000909	0.000960	0.459146	0.087987	0.179805	0.087738	0.211173	0.151297	0.280286
22841	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.008242	0.005834	0.629299	0.140382	0.226408	0.116646	0.259274	0.199185	0.300092
22842	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.003385	0.001608	0.560589	0.220399	0.249662	0.121508	0.273068	0.216199	0.356701
22843	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.4	0.003110	0.003092	0.554049	0.213870	0.302995	0.167736	0.365596	0.324285	0.466045
22844	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.2	0.011447	0.006903	0.654676	0.112580	0.200369	0.102177	0.235554	0.179832	0.316075
22845	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.008705	0.005515	0.633528	0.087772	0.182195	0.090225	0.214190	0.153477	0.282843
22846	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.005529	0.003208	0.598476	0.088847	0.182171	0.089687	0.211305	0.155220	0.281936
22847	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.4	0.003007	0.002670	0.551447	0.206529	0.295533	0.162820	0.348284	0.314753	0.452985
22848	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.005104	0.000906	0.592299	0.093751	0.184818	0.090649	0.217721	0.159163	0.284879
22849	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	...	0.1	0.002390	0.002120	0.533725	0.087553	0.181612	0.086422	0.215550	0.150139	0.282070
22850	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.068365	0.014719	0.792730	0.118960	0.189275	0.103666	0.227426	0.176870	0.303560
22851	0.0	1.0	0.0	0.0	1.0	0.0	0.0	1.0	0.0	0.0	...	0.4	0.001158	0.001014	0.477787	0.137187	0.222968	0.111949	0.253531	0.199752	0.323232
22852	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.1	0.003474	0.003407	0.562592	0.089014	0.180625	0.089924	0.213532	0.152394	0.281468

	PAY_1	PAY_2	PAY_3	PAY_4	PAY_5	PAY_6
0	2	2	-1	-1	-2	-2
1	-1	2	0	0	0	2
2	0	0	0	0	0	0
3	0	0	0	0	0	0
4	-1	0	-1	0	0	0
5	0	0	0	0	0	0
6	0	0	0	0	0	0
7	0	-1	-1	0	0	-1
8	0	0	2	0	0	0
9	-2	-2	-2	-2	-1	-1

	PAY_1	PAY_2	PAY_3	PAY_4	PAY_5	PAY_6
0	2	2	-1	-1	-2	-2
1	-1	2	0	0	0	2
2	0	0	0	0	0	0
3	0	0	0	0	0	0
4	-1	0	-1	0	0	0
5	0	0	0	0	0	0
6	0	0	0	0	0	0
7	0	-1	-1	0	0	-1
8	0	0	2	0	0	0
9	-2	-2	-2	-2	-1	-1