luojunhui преди 1 седмица
родител
ревизия
fae8fb6b3f
променени са 2 файла, в които са добавени 17 реда и са изтрити 15 реда
  1. 11 13
      applications/api/es_api.py
  2. 6 2
      long_articles_job.py

+ 11 - 13
applications/api/es_api.py

@@ -14,7 +14,7 @@ from config.es_mappings import index_name, mappings, settings
 db_client = DatabaseConnector(long_articles_config)
 db_client.connect()
 
-ctx = ssl.create_default_context(cafile="config/es_certs.crt")
+ctx = ssl.create_default_context(cafile="es_certs.crt")
 
 es_password = 'nkvvASQuQ0XUGRq5OLvm'
 es = Elasticsearch(
@@ -78,14 +78,15 @@ def get_articles(id_):
     return docs
 
 
-def search():
+def search(key_string):
     query = {
         "query": {
             "match": {
-                "title": "刘伯承元帅"
+                "title": key_string
             }
         },
-        "_source": ["article_id", "title"]
+        "_source": ["article_id", "title"],
+        "size": 100
     }
 
     a = time.time()
@@ -105,12 +106,9 @@ def get_cluster_docs_stats():
 
 
 if __name__ == "__main__":
-    max_id = get_max_article_id()
-    i = 0
-    while int(max_id) < 27492350:
-        articles = get_articles(max_id)
-        res = helpers.bulk(es, articles)
-        print(es.count(index=index_name))
-        max_id = get_max_article_id()
-        i += 1
-        print(i)
+    with open("search_keys.txt", encoding="utf-8") as f:
+        key_list = f.readlines()
+    import random
+    search_title = random.choice(key_list).strip()
+    print(search_title)
+    search(search_title)

+ 6 - 2
long_articles_job.py

@@ -49,7 +49,11 @@ def run_fwh_data_manager():
 
 
 def run_top_article_generalize_from_article_pool():
-    TopArticleGeneralizeFromArticlePool().deal()
+    task = TopArticleGeneralizeFromArticlePool()
+    top_articles = task.fetch_distinct_top_titles()
+    for top_article in top_articles:
+        keys = task.get_keys_by_ai(top_article)
+        print(",".join(keys))
 
 
 def main():
@@ -85,4 +89,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    run_top_article_generalize_from_article_pool()