2 vuotta sitten · 16e1ccdc69
--- a/gongzhonghao/gongzhonghao_author/gongzhonghao1_author.py
+++ b/gongzhonghao/gongzhonghao_author/gongzhonghao1_author.py
@@ -37,11 +37,11 @@ class GongzhonghaoAuthor1:
 
				             return None
			
 
				         token_dict = {
			
 
				             "token_id": configs[0]["id"],
			
 
				-            "title": configs[0]["title"],
			
 
				-            "token": dict(eval(configs[0]["config"]))["token"],
			
 
				-            "cookie": dict(eval(configs[0]["config"]))["cookie"],
			
 
				+            "title": configs[0]["title"].strip(),
			
 
				+            "token": dict(eval(configs[0]["config"]))["token"].strip(),
			
 
				+            "cookie": dict(eval(configs[0]["config"]))["cookie"].strip(),
			
 
				             "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(configs[0]["update_time"]/1000))),
			
 
				-            "operator": configs[0]["operator"]
			
 
				+            "operator": configs[0]["operator"].strip()
			
 
				         }
			
 
				         # for k, v in token_dict.items():
			
 
				         #     print(f"{k}:{v}")
			
@@ -59,6 +59,7 @@ class GongzhonghaoAuthor1:
 
				             len_sheet = len(user_sheet)
			
 
				             if len_sheet >= 101:
			
 
				                 len_sheet = 101
			
 
				+            Common.logger(log_type, crawler).info(f"len_sheet:{len_sheet}")
			
 
				             for i in range(1, len_sheet):
			
 
				             # for i in range(1, 3):
			
 
				                 user_name = user_sheet[i][0]
			
@@ -76,7 +77,7 @@ class GongzhonghaoAuthor1:
 
				                 tag4 = user_sheet[i][10]
			
 
				                 tag5 = user_sheet[i][11]
			
 
				                 tag6 = user_sheet[i][12]
			
 
				-                Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息")
			
 
				+                Common.logger(log_type, crawler).info(f"正在更新第{i}个用户:{user_name}")
			
 
				                 if out_uid is None or our_uid is None:
			
 
				                     # 用来创建our_id的信息
			
 
				                     user_dict = {
			
@@ -287,60 +288,60 @@ class GongzhonghaoAuthor1:
 
				                 begin += 5
			
 
				                 app_msg_list = r.json()['app_msg_list']
			
 
				                 for article in app_msg_list:
			
 
				-                    try:
			
 
				-                        create_time = article.get('create_time', 0)
			
 
				-                        publish_time_stamp = int(create_time)
			
 
				-                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				-                        article_url = article.get('link', '')
			
 
				-                        video_dict = {
			
 
				-                            'video_id': article.get('aid', ''),
			
 
				-                            'video_title': article.get("title", "").replace(' ', '').replace('"', '').replace("'", ""),
			
 
				-                            'publish_time_stamp': publish_time_stamp,
			
 
				-                            'publish_time_str': publish_time_str,
			
 
				-                            'user_name': user_dict["user_name"],
			
 
				-                            'play_cnt': 0,
			
 
				-                            'comment_cnt': 0,
			
 
				-                            'like_cnt': 0,
			
 
				-                            'share_cnt': 0,
			
 
				-                            'user_id': user_dict['user_id'],
			
 
				-                            'avatar_url': user_dict['avatar_url'],
			
 
				-                            'cover_url': article.get('cover', ''),
			
 
				-                            'article_url': article.get('link', ''),
			
 
				-                            'video_url': cls.get_video_url(article_url, env),
			
 
				-                            'session': f'gongzhonghao-author1-{int(time.time())}'
			
 
				-                        }
			
 
				-                        for k, v in video_dict.items():
			
 
				-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				+                    # try:
			
 
				+                    create_time = article.get('create_time', 0)
			
 
				+                    publish_time_stamp = int(create_time)
			
 
				+                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				+                    article_url = article.get('link', '')
			
 
				+                    video_dict = {
			
 
				+                        'video_id': article.get('aid', ''),
			
 
				+                        'video_title': article.get("title", "").replace(' ', '').replace('"', '').replace("'", ""),
			
 
				+                        'publish_time_stamp': publish_time_stamp,
			
 
				+                        'publish_time_str': publish_time_str,
			
 
				+                        'user_name': user_dict["user_name"],
			
 
				+                        'play_cnt': 0,
			
 
				+                        'comment_cnt': 0,
			
 
				+                        'like_cnt': 0,
			
 
				+                        'share_cnt': 0,
			
 
				+                        'user_id': user_dict['user_id'],
			
 
				+                        'avatar_url': user_dict['avatar_url'],
			
 
				+                        'cover_url': article.get('cover', ''),
			
 
				+                        'article_url': article.get('link', ''),
			
 
				+                        'video_url': cls.get_video_url(article_url, env),
			
 
				+                        'session': f'gongzhonghao-author1-{int(time.time())}'
			
 
				+                    }
			
 
				+                    for k, v in video_dict.items():
			
 
				+                        Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				 
			
 
				-                        if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
			
 
				-                            Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
			
 
				-                            return
			
 
				+                    if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
			
 
				+                        Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
			
 
				+                        return
			
 
				 
			
 
				-                        if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
			
 
				-                            Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
			
 
				-                        # 标题敏感词过滤
			
 
				-                        elif any(str(word) if str(word) in video_dict['video_title'] else False
			
 
				-                                 for word in get_config_from_mysql(log_type=log_type,
			
 
				-                                                                   source=crawler,
			
 
				-                                                                   env=env,
			
 
				-                                                                   text="filter",
			
 
				-                                                                   action="")) is True:
			
 
				-                            Common.logger(log_type, crawler).info("标题已中过滤词\n")
			
 
				-                        # 已下载判断
			
 
				-                        elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
 
				-                            Common.logger(log_type, crawler).info("视频已下载\n")
			
 
				-                        # 标题相似度
			
 
				-                        elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
			
 
				-                            Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
			
 
				-                        else:
			
 
				-                            cls.download_publish(log_type=log_type,
			
 
				-                                                 crawler=crawler,
			
 
				-                                                 video_dict=video_dict,
			
 
				-                                                 rule_dict=rule_dict,
			
 
				-                                                 user_dict=user_dict,
			
 
				-                                                 env=env)
			
 
				-                    except Exception as e:
			
 
				-                        Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
			
 
				+                    if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
			
 
				+                        Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
			
 
				+                    # 标题敏感词过滤
			
 
				+                    elif any(str(word) if str(word) in video_dict['video_title'] else False
			
 
				+                             for word in get_config_from_mysql(log_type=log_type,
			
 
				+                                                               source=crawler,
			
 
				+                                                               env=env,
			
 
				+                                                               text="filter",
			
 
				+                                                               action="")) is True:
			
 
				+                        Common.logger(log_type, crawler).info("标题已中过滤词\n")
			
 
				+                    # 已下载判断
			
 
				+                    elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
			
 
				+                        Common.logger(log_type, crawler).info("视频已下载\n")
			
 
				+                    # 标题相似度
			
 
				+                    elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
			
 
				+                        Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
			
 
				+                    else:
			
 
				+                        cls.download_publish(log_type=log_type,
			
 
				+                                             crawler=crawler,
			
 
				+                                             video_dict=video_dict,
			
 
				+                                             rule_dict=rule_dict,
			
 
				+                                             user_dict=user_dict,
			
 
				+                                             env=env)
			
 
				+                    # except Exception as e:
			
 
				+                    #     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
			
 
				                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
			
 
				                 time.sleep(60)
			
 
				 
			
@@ -476,23 +477,23 @@ class GongzhonghaoAuthor1:
 
				             Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
			
 
				             return
			
 
				         for user_dict in user_list:
			
 
				-            try:
			
 
				-                Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
			
 
				-                cls.get_videoList(log_type=log_type,
			
 
				-                                  crawler=crawler,
			
 
				-                                  rule_dict=rule_dict,
			
 
				-                                  user_dict=user_dict,
			
 
				-                                  env=env)
			
 
				-                Common.logger(log_type, crawler).info('休眠 60 秒\n')
			
 
				-                time.sleep(60)
			
 
				-            except Exception as e:
			
 
				-                Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
			
 
				+            # try:
			
 
				+            Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
			
 
				+            cls.get_videoList(log_type=log_type,
			
 
				+                              crawler=crawler,
			
 
				+                              rule_dict=rule_dict,
			
 
				+                              user_dict=user_dict,
			
 
				+                              env=env)
			
 
				+            Common.logger(log_type, crawler).info('休眠 60 秒\n')
			
 
				+            time.sleep(60)
			
 
				+            # except Exception as e:
			
 
				+            #     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    # GongzhonghaoAuthor1.get_token("author", "gongzhonghao", "dev")
			
 
				+    GongzhonghaoAuthor1.get_token("author", "gongzhonghao", "prod")
			
 
				     # print(GongzhonghaoAuthor1.get_users("author", "gongzhonghao", "Bzv72P", "dev"))
			
 
				     # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
			
 
				     # print(title_like("author", "gongzhonghao", "公众号", "123", "dev"))
			
 
				-    print(GongzhonghaoAuthor1.get_user_info("author", "gongzhonghao", "幸福花朵", "dev"))
			
 
				+    # print(GongzhonghaoAuthor1.get_user_info("author", "gongzhonghao", "幸福花朵", "dev"))
			
 
				     pass
			
--- a/gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py
+++ b/gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py
@@ -37,11 +37,11 @@ class GongzhonghaoAuthor2:
 
				             return None
			
 
				         token_dict = {
			
 
				             "token_id": configs[0]["id"],
			
 
				-            "title": configs[0]["title"],
			
 
				-            "token": dict(eval(configs[0]["config"]))["token"],
			
 
				-            "cookie": dict(eval(configs[0]["config"]))["cookie"],
			
 
				+            "title": configs[0]["title"].strip(),
			
 
				+            "token": dict(eval(configs[0]["config"]))["token"].strip(),
			
 
				+            "cookie": dict(eval(configs[0]["config"]))["cookie"].strip(),
			
 
				             "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(configs[0]["update_time"]/1000))),
			
 
				-            "operator": configs[0]["operator"]
			
 
				+            "operator": configs[0]["operator"].strip()
			
 
				         }
			
 
				         # for k, v in token_dict.items():
			
 
				         #     print(f"{k}:{v}")
			
--- a/gongzhonghao/gongzhonghao_author/gongzhonghao3_author.py
+++ b/gongzhonghao/gongzhonghao_author/gongzhonghao3_author.py
@@ -37,11 +37,11 @@ class GongzhonghaoAuthor3:
 
				             return None
			
 
				         token_dict = {
			
 
				             "token_id": configs[0]["id"],
			
 
				-            "title": configs[0]["title"],
			
 
				-            "token": dict(eval(configs[0]["config"]))["token"],
			
 
				-            "cookie": dict(eval(configs[0]["config"]))["cookie"],
			
 
				+            "title": configs[0]["title"].strip(),
			
 
				+            "token": dict(eval(configs[0]["config"]))["token"].strip(),
			
 
				+            "cookie": dict(eval(configs[0]["config"]))["cookie"].strip(),
			
 
				             "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(configs[0]["update_time"]/1000))),
			
 
				-            "operator": configs[0]["operator"]
			
 
				+            "operator": configs[0]["operator"].strip()
			
 
				         }
			
 
				         # for k, v in token_dict.items():
			
 
				         #     print(f"{k}:{v}")