下载资源后端资源详情
保险风险预测
大小:33.25MB
价格:10积分
下载量:0
评分:
5.0
上传者:qq475225253
更新日期:2025-09-22

机器学习-保险风险预测(基于神经网络和SVM实现,包含完整的代码和数据,数据来源与保诚公司)

资源文件列表(大概)

文件名
大小
保险风险预测/
-
保险风险预测/代码/
-
保险风险预测/代码/SVM-RiskAssessment.py
6.82KB
保险风险预测/代码/nn-RiskAssessment.py
3.46KB
保险风险预测/代码/preprocess.py
7.51KB
保险风险预测/数据/
-
保险风险预测/数据/application_test.csv
25.34MB
保险风险预测/数据/predict.csv
6.32MB
保险风险预测/数据/train.csv
141.81MB

资源内容介绍

机器学习--保险风险预测(基于神经网络和SVM实现,包含完整的代码和数据,数据来源与保诚公司)
#!/usr/bin/env python#_*_coding:utf-8_*_import pandas as pdimport warningswarnings.filterwarnings("ignore")from pandas.plotting import andrews_curvesfrom pandas.plotting import radvizfrom pandas.plotting import parallel_coordinatesimport numpy as npimport seaborn as snsimport matplotlib.pyplot as pltsns.set(style="white", color_codes=True)train_data = pd.read_csv('D:\\DataSet\\Prudential_Life_Insurance_Assessment' '\\train\\train.csv')# print(train_data.columns)# print(train_data.info())# print(train_data.shape)# print(train_data.describe())# total = train_data.isnull().sum().sort_values(ascending=False)# percent = (train_data.isnull().sum()/train_data.isnull().count())\# .sort_values(ascending=False)# Missing_Value = pd.concat([total, percent], axis=1,# keys=['Total', 'Percent'])# print(Missing_Value)train_data = train_data.drop(['Medical_History_10'], axis=1)train_data = train_data.drop(['Medical_History_32'], axis=1)train_data = train_data.drop(['Medical_History_24'], axis=1)train_data = train_data.drop(['Medical_History_15'], axis=1)train_data = train_data.drop(['Family_Hist_5'], axis=1)train_data = train_data.drop(['Family_Hist_3'], axis=1)train_data = train_data.drop(['Family_Hist_2'], axis=1)train_data = train_data.drop(['Insurance_History_5'], axis=1)train_data = train_data.drop(['Family_Hist_4'], axis=1)## total = train_data.isnull().sum().sort_values(ascending=False)# percent = (train_data.isnull().sum()/train_data.isnull().count())\# .sort_values(ascending=False)# Missing_Value = pd.concat([total, percent], axis=1,# keys=['Total', 'Percent'])# print(Missing_Value)train_data['Employment_Info_6'] = train_data['Employment_Info_6'].fillna( train_data['Employment_Info_6'].mean())train_data['Medical_History_1'] = train_data['Medical_History_1'].fillna( train_data['Medical_History_1'].mean())train_data['Employment_Info_4'] = train_data['Employment_Info_4'].fillna( train_data['Employment_Info_4'].mean())train_data = train_data.drop(train_data[train_data['Employment_Info_1'] .isnull()].index)train_data.groupby(['Id', 'InsuredInfo_1', 'InsuredInfo_2', 'InsuredInfo_3', 'InsuredInfo_4', 'InsuredInfo_5', 'InsuredInfo_6', 'Insurance_History_1', 'Insurance_History_2', 'Insurance_History_3', 'Insurance_History_4', 'Insurance_History_7', 'Insurance_History_8', 'Insurance_History_9'], as_index=False)Info1_count = train_data.groupby('Id', as_index=False)['InsuredInfo_1'].agg( {'Info1_count': 'count'})Info2_count = train_data.groupby('Id', as_index=False)['InsuredInfo_2'].agg( {'Info2_count': 'count'})Info3_count = train_data.groupby('Id', as_index=False)['InsuredInfo_3'].agg( {'Info3_count': 'count'})Info4_count = train_data.groupby('Id', as_index=False)['InsuredInfo_4'].agg( {'Info4_count': 'count'})Info5_count = train_data.groupby('Id', as_index=False)['InsuredInfo_5'].agg( {'Info5_count': 'count'})Info6_count = train_data.groupby('Id', as_index=False)['InsuredInfo_6'].agg( {'Info6_count': 'count'})Info7_count = train_data.groupby('Id', as_index=False)['Insurance_History_1']\ .agg({'Info7_count': 'count'})Info8_count = train_data.groupby('Id', as_index=False)['Insurance_History_2']\ .agg({'Info8_count': 'count'})Info9_count = train_data.groupby('Id', as_index=False)['Insurance_History_3']\ .agg({'Info9_count': 'count'})Info10_count = train_data.groupby('Id', as_index=False)['Insurance_History_4']\ .agg({'Info10_count': 'count'})Info11_count = train_data.groupby('Id', as_index=False)['Insurance_History_7']\ .agg({'Info11_count': 'count'})Info12_count = train_data.groupby('Id', as_index=False)['Insurance_History_8']\ .agg({'Info12_count': 'count'})Info13_count = train_data.groupby('Id', as_index=False)['Insurance_History_9']\ .agg({'Info13_count': 'count'})train_data = pd.merge(train_data, Info1_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info2_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info3_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info4_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info5_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info6_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info7_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info8_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info9_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info10_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info11_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info12_count, on=['Id'], how='left')train_data = pd.merge(train_data, Info13_count, on=['Id'], how='left')print(list(set(train_data['Product_Info_2'])))Product_Info_2_map = {'A1': 1, 'A2': 2, 'A3': 3, 'A4': 4, 'A5': 5, 'A6': 6, 'A7': 7, 'A8': 8, 'B1': 9, 'B2': 10, 'C1': 11, 'C2': 12, 'C3': 13, 'C4': 14, 'D1': 15, 'D2': 16, 'D3': 17, 'D4': 18, 'E1': 19}train_data['Product_Info_2'] = train_data['Product_Info_2']\ .map(Product_Info_2_map)# total = train_data.isnull().sum().sort_values(ascending=False)# percent = (train_data.isnull().sum()/train_data.isnull().count())\# .sort_values(ascending=False)# Missing_Value = pd.concat([total, percent], axis=1,# keys=['Total', 'Percent'])# print(Missing_Value)train_data_Sample = train_data.sample(n=100)# train_data_Sample.plot(kind='scatter', x='Ins_Age', y='BMI')# plt.show()# sns.pairplot(train_data_Sample.drop('Product_Info_2', axis=1),# hue='Response', size=6)# plt.show()# sns.FacetGrid(train_data_Sample, hue="Response", size=4) \# .map(sns.kdeplot, "Employment_Info_2")# plt.show()# andrews_curves(train_data.drop('Product_Info_2', axis=1), 'Response')# plt.show()# andrews_curves(train_data_Sample.drop('Product_Info_2', axis=1), 'Response')# plt.show()# parallel_coordinates(train_data_Sample.drop("Product_Info_2", axis=1), 'Response')# plt.show()# radviz(train_data_Sample.drop('Product_Info_2', axis=1), "Response")# plt.show()## sns.FacetGrid(train_data_Sample, hue="Response", size=5).map(plt.scatter, "SepalLengthCm", "SepalWidthCm").add_legend()# plt.show()## sns.FacetGrid(train_data_Sample, hue="Response", size=4)\# .map(plt.scatter, 'Family_Hist_1', 'BMI').add_legend()# plt.show()# ax1 = sns.boxplot(x='Response', y='Ins_Age', data=train_data_Sample)# ax1 = sns.stripplot(x='Response', y="Ins_Age", data=train_data_Sample,# jitter=True, edgecolor="gray")# plt.show()# ax2 = sns.boxplot(x='Response', y='Ht', data=train_data_Sample)# ax2 = sns.stripplot(x='Response', y="Ht", data=train_data_Sample,# jitter=True, edgecolor="gray")# plt.show()# ax3 = sns.boxplot(x='Response', y='Wt', data=train_data_Sample)# ax3 = sns.stripplot(x='Response', y="Wt", data=train_data_Sample,# jitter=True, edgecolor="gray")# plt.show()# ax2 = sns.boxplot(x='Response', y='BMI', data=train_data_Sample)# ax2 = sns.stripplot(x='Response', y="BMI", data=train_data_Sample,# jitter=True, edgecolor="gray")# plt.show()

用户评论 (0)

发表评论

captcha