|
@@ -191,6 +191,7 @@ class SaveFwhDataToDatabase(FwhDataRecycle):
|
|
|
except Exception as e:
|
|
|
self.update_article_read_cnt(wx_sn, show_view_count)
|
|
|
except Exception as e:
|
|
|
+ print(f"article {url} is not available, skip it")
|
|
|
print(e)
|
|
|
|
|
|
def deal(self):
|
|
@@ -200,6 +201,46 @@ class SaveFwhDataToDatabase(FwhDataRecycle):
|
|
|
self.process_each_account_data(publish_articles)
|
|
|
|
|
|
|
|
|
-if __name__ == '__main__':
|
|
|
- SaveFwhDataToDatabase().deal()
|
|
|
+class FwhDataExportTemp(FwhDataRecycle):
|
|
|
+
|
|
|
+ def get_publish_articles(self):
|
|
|
+ sql = f"""
|
|
|
+ select accountName, title, article_group, ItemIndex, show_view_count, from_unixtime(createTime, '%Y-%m-%d'), root_source_id_list
|
|
|
+ from official_articles_v2
|
|
|
+ where ghId in (
|
|
|
+ select gzh_id from article_gzh_developer
|
|
|
+ )
|
|
|
+ and from_unixtime(publish_timestamp) between '2025-06-08' and '2025-06-09';
|
|
|
+ """
|
|
|
+ return self.piaoquan_client.fetch(query=sql, cursor_type=DictCursor)
|
|
|
+
|
|
|
+ def get_fission_info(self, root_source_id_list):
|
|
|
+ """
|
|
|
+ 获取裂变信息
|
|
|
+ """
|
|
|
+ root_source_id_tuple = tuple(json.loads(root_source_id_list))
|
|
|
+ query = f"""
|
|
|
+ select sum(uv) as 'uv', sum(first_uv) as 'first_uv', sum(split_uv) as 'split_uv'
|
|
|
+ from changwen_data_rootsourceid
|
|
|
+ where root_source_id
|
|
|
+ in %s;
|
|
|
+ """
|
|
|
+ return self.long_articles_client.fetch(query=query, cursor_type=DictCursor, params=(root_source_id_tuple,))
|
|
|
+
|
|
|
+ def deal(self):
|
|
|
+ import pandas as pd
|
|
|
+ publish_articles = self.get_publish_articles()
|
|
|
+ L = []
|
|
|
+ for article in publish_articles:
|
|
|
+ root_source_id_list = article['root_source_id_list']
|
|
|
+ fission_info = self.get_fission_info(root_source_id_list)
|
|
|
+ article['uv'] = fission_info[0]['uv']
|
|
|
+ article['first_uv'] = fission_info[0]['first_uv']
|
|
|
+ article['split_uv'] = fission_info[0]['split_uv']
|
|
|
+ L.append(article)
|
|
|
+ df = pd.DataFrame(L)
|
|
|
+ df.to_csv('temp.csv', index=False)
|
|
|
+
|
|
|
|
|
|
+if __name__ == '__main__':
|
|
|
+ FwhDataExportTemp().deal()
|