luojunhui 1 月之前
父節點
當前提交
cc04ce616e
共有 1 個文件被更改,包括 43 次插入2 次删除
  1. 43 2
      tasks/data_tasks/fwh_data_recycle.py

+ 43 - 2
tasks/data_tasks/fwh_data_recycle.py

@@ -191,6 +191,7 @@ class SaveFwhDataToDatabase(FwhDataRecycle):
                 except Exception as e:
                     self.update_article_read_cnt(wx_sn, show_view_count)
             except Exception as e:
+                print(f"article {url} is not available, skip it")
                 print(e)
 
     def deal(self):
@@ -200,6 +201,46 @@ class SaveFwhDataToDatabase(FwhDataRecycle):
             self.process_each_account_data(publish_articles)
 
 
-if __name__ == '__main__':
-    SaveFwhDataToDatabase().deal()
+class FwhDataExportTemp(FwhDataRecycle):
+
+    def get_publish_articles(self):
+        sql = f"""
+            select accountName, title, article_group, ItemIndex, show_view_count, from_unixtime(createTime, '%Y-%m-%d'), root_source_id_list
+            from official_articles_v2
+            where ghId in (
+                select gzh_id from article_gzh_developer
+                )
+            and from_unixtime(publish_timestamp) between '2025-06-08' and '2025-06-09';
+        """
+        return self.piaoquan_client.fetch(query=sql, cursor_type=DictCursor)
+
+    def get_fission_info(self, root_source_id_list):
+        """
+        获取裂变信息
+        """
+        root_source_id_tuple = tuple(json.loads(root_source_id_list))
+        query = f"""
+            select sum(uv) as 'uv', sum(first_uv) as 'first_uv', sum(split_uv) as 'split_uv'
+            from changwen_data_rootsourceid 
+            where root_source_id
+            in %s;
+        """
+        return self.long_articles_client.fetch(query=query, cursor_type=DictCursor, params=(root_source_id_tuple,))
+
+    def deal(self):
+        import pandas as pd
+        publish_articles = self.get_publish_articles()
+        L = []
+        for article in publish_articles:
+            root_source_id_list = article['root_source_id_list']
+            fission_info = self.get_fission_info(root_source_id_list)
+            article['uv'] = fission_info[0]['uv']
+            article['first_uv'] = fission_info[0]['first_uv']
+            article['split_uv'] = fission_info[0]['split_uv']
+            L.append(article)
+        df = pd.DataFrame(L)
+        df.to_csv('temp.csv', index=False)
+
 
+if __name__ == '__main__':
+    FwhDataExportTemp().deal()