test.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import pymysql
  2. import json
  3. from tqdm import tqdm
  4. spider_connection = pymysql.connect(
  5. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
  6. port=3306,
  7. user="crawler",
  8. passwd="crawler123456@",
  9. db="piaoquan-crawler",
  10. charset="utf8mb4"
  11. )
  12. def find_videos(trace_id):
  13. """
  14. 查询已经发布的视频
  15. :return:
  16. """
  17. select_sql = f"""
  18. select recall_video_id1, recall_video_id2, recall_video_id3 from long_articles_video
  19. where trace_id = '{trace_id}';
  20. """
  21. cursor = spider_connection.cursor()
  22. cursor.execute(select_sql)
  23. data = cursor.fetchall()
  24. vid_tuple = data[0]
  25. return vid_tuple
  26. def insert_into_queue(video_id, publish_time_stamp, trace_id):
  27. update_sql = f"""
  28. INSERT INTO get_off_videos
  29. (video_id, publish_time, video_status, trace_id)
  30. values
  31. (%s, %s, %s, %s);
  32. """
  33. cursor = spider_connection.cursor()
  34. cursor.execute(
  35. update_sql,
  36. (video_id, time_stamp, 1, trace_id)
  37. )
  38. spider_connection.commit()
  39. if __name__ == '__main__':
  40. with open("tt_id.txt", encoding="utf-8") as f:
  41. data_line = f.readlines()
  42. for line in tqdm(data_line):
  43. line_ = line.split("\t")
  44. trace_id = line_[0]
  45. time_stamp = int(int(line_[1].strip()) / 1000)
  46. vid_tuple = find_videos(trace_id)
  47. for video_id in vid_tuple:
  48. try:
  49. insert_into_queue(video_id, time_stamp, trace_id)
  50. except Exception as e:
  51. print(e)