|
@@ -3,11 +3,9 @@
|
|
|
迁移rootSourceId
|
|
|
"""
|
|
|
import json
|
|
|
-import time
|
|
|
|
|
|
import pymysql
|
|
|
import datetime
|
|
|
-import schedule
|
|
|
from concurrent.futures.thread import ThreadPoolExecutor
|
|
|
|
|
|
source_list = [
|
|
@@ -29,7 +27,13 @@ source_id_list = {
|
|
|
"touliu_tencentGzhArticle_a18c11dd294df014334f7db72830221a": 1,
|
|
|
"touliu_tencentGzhArticle_c2debdc233827497e24b633dea36c57c": 1,
|
|
|
"touliu_tencentGzhArticle_d66796826916665a23c667472ef4dd56": 1,
|
|
|
- "touliu_tencentGzhArticle_f8e97355f3687f57fd4efeb635a7a3a2": 1
|
|
|
+ "touliu_tencentGzhArticle_f8e97355f3687f57fd4efeb635a7a3a2": 1,
|
|
|
+ "touliu_tencentGzhArticle_gh_68e7fdc09fe4_90bb12e53f6628fd5330310c7c3cc344": 1,
|
|
|
+ "touliu_tencentGzhArticle_gh_68e7fdc09fe4_cd602a61ea073e41404572fce51eb297": 1,
|
|
|
+ "touliu_tencentGzhArticle_gh_68e7fdc09fe4_d8fca9b2712f829d625d98bec37db228": 1,
|
|
|
+ "touliu_tencentGzhArticle_gh_77f36c109fb1_1401a97f6537f32b14496cd5fe6caa70": 1,
|
|
|
+ "touliu_tencentGzhArticle_gh_77f36c109fb1_926713998cd1513370b910ba20adda44": 1,
|
|
|
+ "touliu_tencentGzhArticle_gh_77f36c109fb1_4ca7c1c6223501ff4f80913f8363309f": 1
|
|
|
}
|
|
|
|
|
|
|
|
@@ -87,11 +91,12 @@ def process_each_data(data_tuple):
|
|
|
charset="utf8mb4"
|
|
|
)
|
|
|
source_id = json.loads(result)['productionPath'].split("rootSourceId%3D")[1]
|
|
|
+ video_id = json.loads(result)['productionPath'].split("videos%3Fid%3D")[1].split("%26su%")[0]
|
|
|
sql = f"""
|
|
|
INSERT INTO long_articles_root_source_id
|
|
|
- (rootSourceId, accountName, ghId, articleTitle, requestTime, trace_id, push_type)
|
|
|
+ (rootSourceId, accountName, ghId, articleTitle, requestTime, trace_id, push_type, video_id)
|
|
|
values
|
|
|
- (%s, %s, %s, %s, %s, %s, %s);
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
|
cursor = connection.cursor()
|
|
|
cursor.execute(
|
|
@@ -103,7 +108,8 @@ def process_each_data(data_tuple):
|
|
|
title,
|
|
|
request_time_stamp,
|
|
|
trace_id,
|
|
|
- source_id_list.get(source_id, 2)
|
|
|
+ source_id_list.get(source_id, 2),
|
|
|
+ video_id
|
|
|
)
|
|
|
)
|
|
|
connection.commit()
|