from __future__ import absolute_import import os path from random impo

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from __future__ import absolute_import
import os.path
from random import randint
import logging
class IndexedFile(object):
key_size = 9
def __init__(self, path):
self.path = path
self.fdata = file(self.path)
self.fidx = file(self.index_path())
self._index_size = None
def index_path(self):
return self.path + '.idx'
def make_index(self):
fidx = file(self.index_path(), 'w')
self.fdata.seek(0)
count = 1
while True:
offset = self.fdata.tell()
line = self.fdata.readline()
if line:
fidx.write(('%%0%dd' % self.key_size) % offset)
count += 1
else:
break
fidx.close()
logging.debug('Index size: %d' % count)
def index_size(self):
if not self._index_size:
count = os.path.getsize(self.index_path()) / self.key_size
self._index_size = count
return self._index_size
def read_line(self, number):
self.fidx.seek(number * self.key_size)
offset = int(self.fidx.read(self.key_size))
self.fdata.seek(offset)
return self.fdata.readline().strip()
def read_random_line(self):
number = randint(0, self.index_size())
return self.read_line(number)