import codecs import gzip from os import listdir from os path import j

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import codecs
import gzip
from os import listdir
from os.path import join
from os import walk
import os
from parser import morfFileToExcel
import datetime
outputDir = '/Users/entropy/GZIP/test'
def openAndMorfGzip(path, output):
filename = '.'.join(path.split('/')[-1].split('.')[0: -1])
try:
with gzip.open(path, 'rt', encoding='windows-1252', errors='ignore') as f:
file_content = f.read()
morfFileToExcel(file_content, output + filename)
except:
now = datetime.datetime.now().strftime("%H:%M:%S")
print(now + ' 💥 Reading failed. File: ' + path)
def scanDirectory(path):
f = []
for (root, subdirs, files) in walk(path):
gzipFiles = list(filter(lambda filename: os.path.splitext(filename)[1] == '.gz', files))
filesWithRoot = list(map(lambda filename: os.path.join(root, filename), gzipFiles))
f.extend(filesWithRoot)
openAndMorfGzip(f[0], outputDir)
idx = 1
total = len(f)
for file in f:
now = datetime.datetime.now().strftime("%H:%M:%S")
print(now + ' ' + str(idx) + '/' + str(total) + ' File: ' + file)
openAndMorfGzip(file, outputDir)
idx += 1
scanDirectory('/Users/entropy/Logs')