migrate_vids.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. """
  2. @author: luojunhui
  3. """
  4. import pymysql
  5. from concurrent.futures.thread import ThreadPoolExecutor
  6. def get_data_list():
  7. """
  8. 获取数据
  9. :return:
  10. """
  11. connection = pymysql.connect(
  12. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
  13. port=3306,
  14. user="crawler",
  15. passwd="crawler123456@",
  16. db="piaoquan-crawler",
  17. charset="utf8mb4"
  18. )
  19. sql = "select trace_id, content_id, recall_video_id1, recall_video_id2, recall_video_id3 from long_articles_video where content_status = 2;"
  20. cursor = connection.cursor()
  21. cursor.execute(
  22. sql
  23. )
  24. data = cursor.fetchall()
  25. return data
  26. def mig(single_tuple):
  27. """
  28. :param single_tuple
  29. :return:
  30. """
  31. trace_id = single_tuple[0]
  32. content_id = single_tuple[1]
  33. vid1 = single_tuple[2]
  34. vid2 = single_tuple[3]
  35. vid3 = single_tuple[4]
  36. request_time = trace_id.split("-")[-1]
  37. if "lehuo" in content_id:
  38. video_status = 0
  39. else:
  40. video_status = 1
  41. vid_list = [vid1, vid2, vid3]
  42. for video_id in vid_list:
  43. if video_id:
  44. insert_sql = f"""
  45. INSERT INTO article_match_videos
  46. (video_id, trace_id, content_id, request_time, video_status)
  47. values
  48. (%s, %s, %s, %s, %s);
  49. """
  50. connection = pymysql.connect(
  51. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
  52. port=3306,
  53. user="crawler",
  54. passwd="crawler123456@",
  55. db="piaoquan-crawler",
  56. charset="utf8mb4"
  57. )
  58. cursor = connection.cursor()
  59. cursor.execute(
  60. insert_sql,
  61. (
  62. video_id, trace_id, content_id, request_time, video_status
  63. )
  64. )
  65. connection.commit()
  66. else:
  67. continue
  68. tuple_list = get_data_list()
  69. print(len(tuple_list))
  70. with ThreadPoolExecutor(max_workers=20) as Pool:
  71. Pool.map(mig, tuple_list)