|
@@ -14,7 +14,7 @@ from config.es_mappings import index_name, mappings, settings
|
|
|
db_client = DatabaseConnector(long_articles_config)
|
|
|
db_client.connect()
|
|
|
|
|
|
-ctx = ssl.create_default_context(cafile="config/es_certs.crt")
|
|
|
+ctx = ssl.create_default_context(cafile="es_certs.crt")
|
|
|
|
|
|
es_password = 'nkvvASQuQ0XUGRq5OLvm'
|
|
|
es = Elasticsearch(
|
|
@@ -78,14 +78,15 @@ def get_articles(id_):
|
|
|
return docs
|
|
|
|
|
|
|
|
|
-def search():
|
|
|
+def search(key_string):
|
|
|
query = {
|
|
|
"query": {
|
|
|
"match": {
|
|
|
- "title": "刘伯承元帅"
|
|
|
+ "title": key_string
|
|
|
}
|
|
|
},
|
|
|
- "_source": ["article_id", "title"]
|
|
|
+ "_source": ["article_id", "title"],
|
|
|
+ "size": 100
|
|
|
}
|
|
|
|
|
|
a = time.time()
|
|
@@ -105,12 +106,9 @@ def get_cluster_docs_stats():
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- max_id = get_max_article_id()
|
|
|
- i = 0
|
|
|
- while int(max_id) < 27492350:
|
|
|
- articles = get_articles(max_id)
|
|
|
- res = helpers.bulk(es, articles)
|
|
|
- print(es.count(index=index_name))
|
|
|
- max_id = get_max_article_id()
|
|
|
- i += 1
|
|
|
- print(i)
|
|
|
+ with open("search_keys.txt", encoding="utf-8") as f:
|
|
|
+ key_list = f.readlines()
|
|
|
+ import random
|
|
|
+ search_title = random.choice(key_list).strip()
|
|
|
+ print(search_title)
|
|
|
+ search(search_title)
|