feature_files = [
# '../../features/target.csv',
# '../../features/prescriptions_number.csv',
# '../../features/appointments_number.csv',
'../../features/prescriptions_insurance_count.csv',
'../../features/prescriptions_insurance_share.csv',
'../../features/prescriptions_per_employee_avg.csv',
'../../features/presc_app_time_avg.csv',
'../../features/presc_app_time_max.csv',
'../../features/presc_app_time_min.csv',
'../../features/prescriptions_without_discount.csv',
'../../features/prescriptions_has_appointments.csv',
# '../../features/prescriptions_discount_avg.csv', # тут мало данных
# '../../features/prescriptions_one_time_discount.csv', # тут мало данных
# '../../features/prescriptions_constant_discount.csv', # тут мало данных
]
target_all = pd.read_csv('../../features/target.csv', index_col=0)
target_all['SigningTime'] = to_dt(target_all['SigningTime'])
target_all.head()
target = target_all[['PassedAtDay44', 'SigningTime']]
target = target.rename(columns={
'PassedAtDay44': 'y'
})
features = []
for f in feature_files:
df = pd.read_csv(f, index_col=0)
cols = [col for col in df if col.endswith('AtDay' + str(prediction_day))]
df = df[cols]
features.append(df)
features = pd.concat(features, axis=1, join='inner')
all_data = pd.concat([features, target], axis=1, join='inner')
all_data.head()