|  | @@ -7,12 +7,21 @@ import time
 | 
	
		
			
				|  |  |  import json
 | 
	
		
			
				|  |  |  import traceback
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +import urllib.parse
 | 
	
		
			
				|  |  |  from tqdm import tqdm
 | 
	
		
			
				|  |  |  from datetime import datetime
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  ARTICLE_TABLE = "official_articles_v2"
 | 
	
		
			
				|  |  | +ARTICLE_DELETE_CODE = 25005
 | 
	
		
			
				|  |  | +ARTICLE_SUCCESS_CODE = 0
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +DEFAULT_STATUS = 0
 | 
	
		
			
				|  |  | +REQUEST_FAIL_STATUS = -1
 | 
	
		
			
				|  |  | +DELETE_STATUS = -2
 | 
	
		
			
				|  |  | +UNKNOWN_STATUS = -3
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def get_accounts_v1():
 | 
	
	
		
			
				|  | @@ -446,15 +455,109 @@ def check_job():
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +def get_articles(db_client):
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    :return:
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    sql = f"""
 | 
	
		
			
				|  |  | +    SELECT ContentUrl, wx_sn 
 | 
	
		
			
				|  |  | +    FROM official_articles_v2 
 | 
	
		
			
				|  |  | +    WHERE publish_timestamp in {(DEFAULT_STATUS, REQUEST_FAIL_STATUS)};"""
 | 
	
		
			
				|  |  | +    response = db_client.select(sql)
 | 
	
		
			
				|  |  | +    return response
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def update_publish_timestamp(db_client, row):
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    更新发布时间戳 && minigram 信息
 | 
	
		
			
				|  |  | +    :param db_client:
 | 
	
		
			
				|  |  | +    :param row:
 | 
	
		
			
				|  |  | +    :return:
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    url = row[0]
 | 
	
		
			
				|  |  | +    wx_sn = row[1]
 | 
	
		
			
				|  |  | +    try:
 | 
	
		
			
				|  |  | +        response = WeixinSpider().get_article_text(url)
 | 
	
		
			
				|  |  | +        response_code = response['code']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if response_code == ARTICLE_DELETE_CODE:
 | 
	
		
			
				|  |  | +            publish_timestamp_s = DELETE_STATUS
 | 
	
		
			
				|  |  | +            root_source_id_list = []
 | 
	
		
			
				|  |  | +        elif response_code == ARTICLE_SUCCESS_CODE:
 | 
	
		
			
				|  |  | +            data = response['data']['data']
 | 
	
		
			
				|  |  | +            publish_timestamp_ms = data['publish_timestamp']
 | 
	
		
			
				|  |  | +            publish_timestamp_s = int(publish_timestamp_ms / 1000)
 | 
	
		
			
				|  |  | +            mini_program = data.get('mini_program', [])
 | 
	
		
			
				|  |  | +            if mini_program:
 | 
	
		
			
				|  |  | +                root_source_id_list = [
 | 
	
		
			
				|  |  | +                    urllib.parse.parse_qs(
 | 
	
		
			
				|  |  | +                        urllib.parse.unquote(i['path'])
 | 
	
		
			
				|  |  | +                    )['rootSourceId'][0]
 | 
	
		
			
				|  |  | +                    for i in mini_program
 | 
	
		
			
				|  |  | +                ]
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                root_source_id_list = []
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            publish_timestamp_s = UNKNOWN_STATUS
 | 
	
		
			
				|  |  | +            root_source_id_list = []
 | 
	
		
			
				|  |  | +    except Exception as e:
 | 
	
		
			
				|  |  | +        publish_timestamp_s = REQUEST_FAIL_STATUS
 | 
	
		
			
				|  |  | +        root_source_id_list = []
 | 
	
		
			
				|  |  | +        error_msg = traceback.format_exc()
 | 
	
		
			
				|  |  | +        print(e, error_msg)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    update_sql = f"""
 | 
	
		
			
				|  |  | +            UPDATE official_articles_v2
 | 
	
		
			
				|  |  | +            SET publish_timestamp = %s, root_source_id_list = %s
 | 
	
		
			
				|  |  | +            WHERE wx_sn = %s;
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +    db_client.update(
 | 
	
		
			
				|  |  | +        sql=update_sql,
 | 
	
		
			
				|  |  | +        params=(
 | 
	
		
			
				|  |  | +            publish_timestamp_s,
 | 
	
		
			
				|  |  | +            json.dumps(root_source_id_list, ensure_ascii=False),
 | 
	
		
			
				|  |  | +            wx_sn
 | 
	
		
			
				|  |  | +        ))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def get_article_detail_job():
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    获取发布文章详情
 | 
	
		
			
				|  |  | +    :return:
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    try:
 | 
	
		
			
				|  |  | +        db_client = PQMySQL()
 | 
	
		
			
				|  |  | +    except Exception as e:
 | 
	
		
			
				|  |  | +        error_msg = traceback.format_exc()
 | 
	
		
			
				|  |  | +        bot(
 | 
	
		
			
				|  |  | +            title="获取文章详情任务连接数据库失败",
 | 
	
		
			
				|  |  | +            detail={
 | 
	
		
			
				|  |  | +                "job": "get_article_detail_job",
 | 
	
		
			
				|  |  | +                "error": e,
 | 
	
		
			
				|  |  | +                "msg": error_msg
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +        return
 | 
	
		
			
				|  |  | +    article_tuple = get_articles(db_client)
 | 
	
		
			
				|  |  | +    for article in tqdm(article_tuple):
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            update_publish_timestamp(db_client=db_client, row=article)
 | 
	
		
			
				|  |  | +        except Exception as e:
 | 
	
		
			
				|  |  | +            print(e)
 | 
	
		
			
				|  |  | +            error_msg = traceback.format_exc()
 | 
	
		
			
				|  |  | +            print(error_msg)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  def main():
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      main
 | 
	
		
			
				|  |  |      :return:
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      update_job()
 | 
	
		
			
				|  |  | -    time.sleep(60)
 | 
	
		
			
				|  |  |      check_job()
 | 
	
		
			
				|  |  | +    get_article_detail_job()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == '__main__':
 | 
	
		
			
				|  |  | -    main()
 | 
	
		
			
				|  |  | +    main()
 |