feature_files features target csv features prescriptions_number csv fe

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
feature_files = [
# '../../features/target.csv',
# '../../features/prescriptions_number.csv',
# '../../features/appointments_number.csv',
'../../features/prescriptions_insurance_count.csv',
'../../features/prescriptions_insurance_share.csv',
'../../features/prescriptions_per_employee_avg.csv',
'../../features/presc_app_time_avg.csv',
'../../features/presc_app_time_max.csv',
'../../features/presc_app_time_min.csv',
'../../features/prescriptions_without_discount.csv',
'../../features/prescriptions_has_appointments.csv',
# '../../features/prescriptions_discount_avg.csv', # тут мало данных
# '../../features/prescriptions_one_time_discount.csv', # тут мало данных
# '../../features/prescriptions_constant_discount.csv', # тут мало данных
]
target_all = pd.read_csv('../../features/target.csv', index_col=0)
target_all['SigningTime'] = to_dt(target_all['SigningTime'])
target_all.head()
target = target_all[['PassedAtDay44', 'SigningTime']]
target = target.rename(columns={
'PassedAtDay44': 'y'
})
features = []
for f in feature_files:
df = pd.read_csv(f, index_col=0)
cols = [col for col in df if col.endswith('AtDay' + str(prediction_day))]
df = df[cols]
features.append(df)
features = pd.concat(features, axis=1, join='inner')
all_data = pd.concat([features, target], axis=1, join='inner')
all_data.head()