usr bin env python from collections import defaultdict from random imp

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python
from collections import defaultdict
from random import choice
SEQ_LEN = 2
class TextGenerator(object):
def __init__(self):
self._data = defaultdict(list)
def train(self, file):
words = (None,) * SEQ_LEN
for line in open(file):
for word in line.split():
words = words[1:] + (word,)
if words[0]:
self._data[words[:-1]].append(words[-1],)
def gentext(self, num_words):
text = []
text += list(choice(self._data.keys()))
while len(text) < num_words:
if self._data.has_key(tuple(text[-SEQ_LEN+1:])):
to_add = choice(self._data[tuple(text[-SEQ_LEN+1:])])
text.append(to_add)
else:
text.append(choice(self._data.keys()[0]))
return ' '.join(text) + '.'
if __name__ == '__main__':
textgen = TextGenerator()
textgen.train('pandp.txt')
print textgen.gentext(100)