12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- import datetime
- import json
- import time
- import traceback
- from pandas import DataFrame
- from applications import aiditApi, log, bot
- from applications.db import DatabaseConnector
- from config import long_articles_config
- class PublishArticlePoolArticles:
- def __init__(self):
- self.db_client = DatabaseConnector(long_articles_config)
- self.db_client.connect()
- def insert_crawler_plan(self, crawler_plan_id,crawler_plan_name, create_timestamp):
- insert_query = f"""
- insert into article_crawler_plan (crawler_plan_id, name, create_timestamp) values (%s, %s, %s);
- """
- try:
- self.db_client.save(
- query=insert_query,
- params=(crawler_plan_id, crawler_plan_name, create_timestamp)
- )
- except Exception as e:
- bot(
- title="品类冷启任务,记录抓取计划id失败",
- detail={
- "error": str(e),
- "error_msg": traceback.format_exc(),
- "crawler_plan_id": crawler_plan_id,
- "crawler_plan_name": crawler_plan_name
- }
- )
- class PublishGzhArticles(PublishArticlePoolArticles):
- def get_articles_by_crawler_method(self, crawler_method):
- fetch_query = f"""
- select
- article_id, out_account_id, article_index, title, link, read_cnt, status, llm_sensitivity, score, category_by_ai
- from crawler_meta_article
- where category = '{crawler_method}' and platform = 'weixin' and title_sensitivity = 0;
- """
- fetch_response = self.db_client.fetch(fetch_query)
- article_data_frame = DataFrame(
- fetch_response,
- columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status',
- 'llm_sensitivity', 'score', 'category_by_ai']
- )
- return article_data_frame
|