from datetime import datetime timedelta def add_delta_days delta day r

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from datetime import datetime, timedelta
def add_delta_days(delta, day):
return (datetime.strptime(day, '%Y-%m-%d') + timedelta(delta)).strftime('%Y-%m-%d')
def get_week_intervals_peer_month(day):
start_day = add_delta_days(-28, day)
intervals = [(add_delta_days(i*7, start_day), add_delta_days((i+1)*7, start_day)) for i in range(4)]
return intervals
inter_id = ['138', '189', '504', '505', '506', '515', '524', '525', '526', '527', '743']
resou_id = ','.join(['gazeta.ru', 'lenta.ru', 'livejournal.com', 'price.ru', 'afisha.ru', 'eda.ru', 'gorod.afisha.ru'])
def get_users_count_per_interval(resours_id, day_start, day_end):
QUERY_TEMPLATE_RES_USERS_COUNT = '''
SELECT DISTINCT(ruid) as ruid FROM factor_url_domain_day
WHERE url = '{resours_id}' and day >= '{day_start}' and day < '{day_end}'
'''
return QUERY_TEMPLATE_RES_USERS_COUNT.format(resours_id=resours_id, day_start=day_start, day_end=day_end)
def get_mega_join(table_1, table_2, solt):
MEGA_JOIN = '''
FROM (
{table_1}
) {solt}1
JOIN (
{table_2}
) {solt}2
ON ({solt}1.ruid == {solt}2.ruid)
'''
return MEGA_JOIN.format(table_1=table_1, table_2=table_2, solt=solt)
def get_kernal(day, resours_id):
intervals = get_week_intervals_peer_month(day)
intervals_query = map(
lambda (start_day, end_day): get_users_count_per_interval(resours_id, start_day, end_day),
intervals
)
kernal = get_mega_join(
'SELECT c1.ruid' + get_mega_join(intervals_query[0], intervals_query[1], 'c'),
'SELECT b1.ruid' + get_mega_join(intervals_query[2], intervals_query[3], 'b'),
'a'
)
return kernal
def det_count_ruids(table, solt):
COUNT_TEMPLATE = '''
SELECT COUNT({solt}.ruid) {table}
'''
return COUNT_TEMPLATE.format(table=table, solt=solt)
def get_count_kernal_on_res_with_inter(kernal, day, inter_id):
QUERY_TEMPLATE_RES_USERS_COUNT_WITH_RESID = '''
FROM (
SELECT users_inter.ruid, users_inter.inter
FROM (
SELECT a1.ruid
{kernal}
) res_id_users
JOIN (
SELECT ruid, inter
FROM factor_interests_day LATERAL VIEW explode(split(interests, ',')) explode_inters_table as inter
where day = '{day}'
) users_inter
ON (res_id_users.ruid == users_inter.ruid)
) resours_ruid_inter_users
SELECT COUNT(DISTINCT(ruid)) WHERE array_contains(split('{inter_id}', ','), inter);
'''
return QUERY_TEMPLATE_RES_USERS_COUNT_WITH_RESID.format(inter_id=inter_id, day=day, kernal=kernal)
def main():
day = '2014-12-16'
resours_id = resou_id[0]
kernal = get_kernal(day, resours_id)
count_kernal_ruids = det_count_ruids(kernal, 'a1')
user_from_kernal_with_res_id = get_count_kernal_on_res_with_inter(kernal, day, resou_id)
print user_from_kernal_with_res_id
main()