publish_article_pool_articles.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import datetime
  2. import json
  3. import time
  4. import traceback
  5. from pandas import DataFrame
  6. from applications import aiditApi, log, bot
  7. from applications.db import DatabaseConnector
  8. from config import long_articles_config
  9. class PublishArticlePoolArticles:
  10. def __init__(self):
  11. self.db_client = DatabaseConnector(long_articles_config)
  12. self.db_client.connect()
  13. def insert_crawler_plan(self, crawler_plan_id,crawler_plan_name, create_timestamp):
  14. insert_query = f"""
  15. insert into article_crawler_plan (crawler_plan_id, name, create_timestamp) values (%s, %s, %s);
  16. """
  17. try:
  18. self.db_client.save(
  19. query=insert_query,
  20. params=(crawler_plan_id, crawler_plan_name, create_timestamp)
  21. )
  22. except Exception as e:
  23. bot(
  24. title="品类冷启任务,记录抓取计划id失败",
  25. detail={
  26. "error": str(e),
  27. "error_msg": traceback.format_exc(),
  28. "crawler_plan_id": crawler_plan_id,
  29. "crawler_plan_name": crawler_plan_name
  30. }
  31. )
  32. class PublishGzhArticles(PublishArticlePoolArticles):
  33. def get_articles_by_crawler_method(self, crawler_method):
  34. fetch_query = f"""
  35. select
  36. article_id, out_account_id, article_index, title, link, read_cnt, status, llm_sensitivity, score, category_by_ai
  37. from crawler_meta_article
  38. where category = '{crawler_method}' and platform = 'weixin' and title_sensitivity = 0;
  39. """
  40. fetch_response = self.db_client.fetch(fetch_query)
  41. article_data_frame = DataFrame(
  42. fetch_response,
  43. columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status',
  44. 'llm_sensitivity', 'score', 'category_by_ai']
  45. )
  46. return article_data_frame