import logging import pendulum import simplejson as json from elastics

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import logging
import pendulum
import simplejson as json
from elasticsearch_dsl import (Index, Text, Boolean, Document,
Q, Nested, Long, Date, Integer, Keyword, Float,
tokenizer, analyzer, Completion, analysis
)
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Search
from tasks.settings import (MAXSIZE, ELASTIC_HOSTS_LUMOTIVE, ELASTIC_USER_LUMOTIVE,
ELASTIC_PASS_LUMOTIVE, ConnectElasticProdLumotive, ELASTIC_INDEX_NAME)
# elasticsearch connect
elastic = ConnectElasticProdLumotive().connect()
connections.create_connection(hosts=ELASTIC_HOSTS_LUMOTIVE,
http_auth=(ELASTIC_USER_LUMOTIVE, ELASTIC_PASS_LUMOTIVE),
maxsize=MAXSIZE)
log = logging.getLogger('Posts Index')
autocomplete_edge_ngram = analyzer('autocomplete_edge_ngram',
tokenizer='standard',
# tokenizer=tokenizer('autocomplete_filter', 'edge_ngram', min_gram=3, max_gram=20),
filter=['lowercase', 'asciifolding',
analysis.token_filter('autocomplete_filter', 'edge_ngram', min_gram=3,
max_gram=7),
analysis.token_filter('url_filter', 'stop',
stop_words=["http", "https", ":", "/", ".", "html"]
)]
)
autocomplete_ngram = analyzer('autocomplete_ngram',
tokenizer='standard',
# tokenizer=tokenizer('autocomplete_filter', 'edge_ngram', min_gram=3, max_gram=20),
filter=['lowercase', 'asciifolding',
analysis.token_filter('autocomplete_filter', 'nGram', min_gram=3,
max_gram=7),
analysis.token_filter('url_filter', 'stop',
stop_words=["http", "https", ":", "/", ".", "html"]
)]
)
class CuratedTopics(Document):
id = Keyword()
name = Text(fields={
'raw': Keyword()
})
url = Text(fields={
'raw': Keyword(), 'edgengram': Text(analyzer=autocomplete_edge_ngram),
'ngram': Text(analyzer=autocomplete_ngram)
}, fielddata=True)
url_suggest = Completion()
netloc = Text(fields={
'raw': Keyword(), 'edgengram': Text(analyzer=autocomplete_edge_ngram),
'ngram': Text(analyzer=autocomplete_ngram)
}, fielddata=True)
domain_authority = Integer()
feed_type = Keyword()
page_rank = Integer()
is_entity_search = Boolean()
class Index:
name = 'curated_topics'