преди 1 седмица · fae8fb6b3f
--- a/applications/api/es_api.py
+++ b/applications/api/es_api.py
@@ -14,7 +14,7 @@ from config.es_mappings import index_name, mappings, settings
 
				 db_client = DatabaseConnector(long_articles_config)
			
 
				 db_client.connect()
			
 
				 
			
 
				-ctx = ssl.create_default_context(cafile="config/es_certs.crt")
			
 
				+ctx = ssl.create_default_context(cafile="es_certs.crt")
			
 
				 
			
 
				 es_password = 'nkvvASQuQ0XUGRq5OLvm'
			
 
				 es = Elasticsearch(
			
@@ -78,14 +78,15 @@ def get_articles(id_):
 
				     return docs
			
 
				 
			
 
				 
			
 
				-def search():
			
 
				+def search(key_string):
			
 
				     query = {
			
 
				         "query": {
			
 
				             "match": {
			
 
				-                "title": "刘伯承元帅"
			
 
				+                "title": key_string
			
 
				             }
			
 
				         },
			
 
				-        "_source": ["article_id", "title"]
			
 
				+        "_source": ["article_id", "title"],
			
 
				+        "size": 100
			
 
				     }
			
 
				 
			
 
				     a = time.time()
			
@@ -105,12 +106,9 @@ def get_cluster_docs_stats():
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    max_id = get_max_article_id()
			
 
				-    i = 0
			
 
				-    while int(max_id) < 27492350:
			
 
				-        articles = get_articles(max_id)
			
 
				-        res = helpers.bulk(es, articles)
			
 
				-        print(es.count(index=index_name))
			
 
				-        max_id = get_max_article_id()
			
 
				-        i += 1
			
 
				-        print(i)
			
 
				+    with open("search_keys.txt", encoding="utf-8") as f:
			
 
				+        key_list = f.readlines()
			
 
				+    import random
			
 
				+    search_title = random.choice(key_list).strip()
			
 
				+    print(search_title)
			
 
				+    search(search_title)
			
--- a/long_articles_job.py
+++ b/long_articles_job.py
@@ -49,7 +49,11 @@ def run_fwh_data_manager():
 
				 
			
 
				 
			
 
				 def run_top_article_generalize_from_article_pool():
			
 
				-    TopArticleGeneralizeFromArticlePool().deal()
			
 
				+    task = TopArticleGeneralizeFromArticlePool()
			
 
				+    top_articles = task.fetch_distinct_top_titles()
			
 
				+    for top_article in top_articles:
			
 
				+        keys = task.get_keys_by_ai(top_article)
			
 
				+        print(",".join(keys))
			
 
				 
			
 
				 
			
 
				 def main():
			
@@ -85,4 +89,4 @@ def main():
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    main()
			
 
				+    run_top_article_generalize_from_article_pool()