import logging import pendulum import simplejson as json from elastics

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import logging
import pendulum
import simplejson as json
from elasticsearch_dsl import (Index, Text, Boolean, Document,
Q, Nested, Long, Date, Integer, Keyword, Float,
tokenizer, analyzer, Completion, analysis
)
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Search
from tasks.settings import (MAXSIZE, ELASTIC_HOSTS_LUMOTIVE, ELASTIC_USER_LUMOTIVE,
ELASTIC_PASS_LUMOTIVE, ConnectElasticProdLumotive, ELASTIC_INDEX_NAME)
# elasticsearch connect
elastic = ConnectElasticProdLumotive().connect()
connections.create_connection(hosts=ELASTIC_HOSTS_LUMOTIVE,
http_auth=(ELASTIC_USER_LUMOTIVE, ELASTIC_PASS_LUMOTIVE),
maxsize=MAXSIZE)
log = logging.getLogger('Posts Index')
autocomplete_edge_ngram = analyzer('autocomplete_edge_ngram',
tokenizer='standard',
# tokenizer=tokenizer('autocomplete_filter', 'edge_ngram', min_gram=3, max_gram=20),
filter=['lowercase', 'asciifolding',
analysis.token_filter('autocomplete_filter', 'edge_ngram', min_gram=3,
max_gram=7),
analysis.token_filter('url_filter', 'stop',
stop_words=["http", "https", ":", "/", ".", "html"]
)]
)
autocomplete_ngram = analyzer('autocomplete_ngram',
tokenizer='standard',
# tokenizer=tokenizer('autocomplete_filter', 'edge_ngram', min_gram=3, max_gram=20),
filter=['lowercase', 'asciifolding',
analysis.token_filter('autocomplete_filter', 'nGram', min_gram=3,
max_gram=7),
analysis.token_filter('url_filter', 'stop',
stop_words=["http", "https", ":", "/", ".", "html"]
)]
)
class CuratedTopics(Document):
id = Keyword()
name = Text(fields={
'raw': Keyword()
})
url = Text(fields={
'raw': Keyword(), 'edgengram': Text(analyzer=autocomplete_edge_ngram),
'ngram': Text(analyzer=autocomplete_ngram)
}, fielddata=True)
url_suggest = Completion()
netloc = Text(fields={
'raw': Keyword(), 'edgengram': Text(analyzer=autocomplete_edge_ngram),
'ngram': Text(analyzer=autocomplete_ngram)
}, fielddata=True)
domain_authority = Integer()
feed_type = Keyword()
page_rank = Integer()
is_entity_search = Boolean()
class Index:
name = 'curated_topics'
if __name__ == '__main__':
template = CuratedTopics._index.as_template('curated_topics')
template_dict = template.to_dict()
print(json.dumps(template_dict))
favorite = Index(ELASTIC_INDEX_NAME)
favorite.settings(number_of_shards=8, codec="best_compression")
favorite.doc_type(CuratedTopics)
dict_item = favorite.to_dict()
print(json.dumps(dict_item))