hurry.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. import pymysql
  7. from tqdm import tqdm
  8. spider_connection = pymysql.connect(
  9. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
  10. port=3306,
  11. user="crawler",
  12. passwd="crawler123456@",
  13. db="piaoquan-crawler",
  14. charset="utf8mb4"
  15. )
  16. with open("id.txt", encoding="utf-8") as f:
  17. data = f.readlines()
  18. L = {}
  19. for line in data:
  20. new_id = line.split(",")[1].strip()
  21. old_id = line.split(",")[0]
  22. L[new_id] = old_id
  23. print(len(L))
  24. with open("update_video_trace_id.json", encoding="utf-8") as f:
  25. publish_data = json.loads(f.read())
  26. print(len(publish_data))
  27. def update_into_long_videos(trace_id, vid1, vid2, vid3):
  28. """
  29. 更新video_id
  30. :param trace_id:
  31. :param vid1:
  32. :param vid2:
  33. :param vid3:
  34. :return:
  35. """
  36. update_sql = f"""
  37. UPDATE long_articles_video
  38. set recall_video_id1 = %s, recall_video_id2 = %s, recall_video_id3 = %s
  39. where trace_id = %s;
  40. """
  41. cursor = spider_connection.cursor()
  42. cursor.execute(
  43. update_sql,
  44. (vid1, vid2, vid3, trace_id)
  45. )
  46. spider_connection.commit()
  47. ff = 0
  48. for item in tqdm(publish_data):
  49. trace_id = item['trace_id']
  50. data_info = item['result_data']
  51. try:
  52. vid_list = []
  53. for video_obj in json.loads(data_info):
  54. path = video_obj['productionPath']
  55. video_id = path.split("id%3D")[1].split("%26su")[0]
  56. # if L.get(video_id):
  57. vid_list.append(video_id)
  58. update_into_long_videos(
  59. trace_id=trace_id,
  60. vid1=vid_list[0],
  61. vid2=vid_list[1],
  62. vid3=vid_list[2]
  63. )
  64. except Exception as e:
  65. print("No videos {}".format(e))
  66. print(ff)