usr bin env python coding utf-8 import pandas as pd import os def get_

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pandas as pd
import os
def get_tac_filtered_df(df):
df.tac = pd.to_numeric(df.tac, errors='coerce')
df = df[df.tac.notnull()]
df = df[(df.tac.map(int).map(str).apply(len) == 6) |
(df.tac.map(int).map(str).apply(len) == 8)]
return df
def join_columns(df, sep, *columns):
col1, col2 = columns
df[col1].fillna('', inplace=True)
df[col2].fillna('', inplace=True)
df[col1] = df[[col1, col2]].apply(
lambda x: sep.join(x) if len(''.join(x)) else float('NaN'), axis=1)
df.drop(col2, axis=1, inplace=True)
return df
sources_path = './sources'
tac_csv = os.path.join(sources_path, 'tac.csv')
tacdb_csv = os.path.join(sources_path, 'tacdb (1).csv')
imeidb_csv = os.path.join(sources_path, 'imeidb.csv')
tac_db_100000 = os.path.join(sources_path, 'tac-db-100000.csv')
tac_data = pd.read_csv(tac_csv, engine='python', sep=';')
tac_data.drop('id', axis=1, inplace=True)
tac_data.drop('version', axis=1, inplace=True)
tac_data.drop('last_update', axis=1, inplace=True)
tac_data = get_tac_filtered_df(tac_data)
imeidb_data = pd.read_csv(tacdb_csv, engine='python', sep=',', skiprows=1)
imeidb_data.drop('contributor', axis=1, inplace=True)
imeidb_data.drop('comment', axis=1, inplace=True)
imeidb_data = get_tac_filtered_df(imeidb_data)
imeidb_data = join_columns(imeidb_data, ',', 'name.1', 'aka')
imeidb_data = join_columns(imeidb_data, ',', 'gsmarena', 'gsmarena.1')
imeidb_data.rename(columns={'name': 'manufacturer', 'name.1': 'model', 'gsmarena': 'img'},
inplace=True)
print(tac_data)
final_data = tac_data.append(imeidb_data, sort=False)
final_data.drop_duplicates(subset='tac', keep="first", inplace=True)
print(final_data)
# imeidb_data['gsmarena'].fillna('', inplace=True)
# imeidb_data['gsmarena.1'].fillna('', inplace=True)
# imeidb_data['gsmarena'] += imeidb_data['gsmarena.1']