save_to_db.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. """
  2. @author: luojunhui
  3. """
  4. import traceback
  5. from applications.aliyunLogApi import log
  6. def insert_into_single_video_source_table(db_client, video_item):
  7. """
  8. insert video into single video source table
  9. """
  10. insert_sql = f"""
  11. INSERT INTO publish_single_video_source
  12. (content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url, video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account)
  13. values
  14. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  15. """
  16. try:
  17. db_client.save(
  18. query=insert_sql,
  19. params=(
  20. video_item["content_trace_id"],
  21. video_item["article_title"],
  22. video_item["out_account_id"],
  23. video_item["out_account_name"],
  24. video_item["read_cnt"],
  25. video_item["like_cnt"],
  26. video_item["article_url"],
  27. video_item["cover_url"],
  28. video_item["video_oss_path"],
  29. video_item["publish_timestamp"],
  30. video_item["crawler_timestamp"],
  31. video_item["url_unique_md5"],
  32. video_item["category"],
  33. video_item["tags"],
  34. video_item["platform"],
  35. video_item["source_account"],
  36. ),
  37. )
  38. except Exception as e:
  39. log(
  40. task="{}_video_crawler".format(video_item["platform"]),
  41. function="save_each_video",
  42. message="save video failed",
  43. data={
  44. "error": str(e),
  45. "traceback": traceback.format_exc(),
  46. "video_id": video_item["url_unique_md5"],
  47. "oss_path": video_item["video_oss_path"],
  48. },
  49. )
  50. def insert_into_video_meta_accounts_table(db_client, account_item):
  51. """
  52. insert account into account meta table
  53. """
  54. insert_sql = f"""
  55. insert into video_meta_accounts
  56. (platform, account_id, account_name, max_cursor, account_init_date, status, priority)
  57. values
  58. (%s, %s, %s, %s, %s, %s, %s);
  59. """
  60. try:
  61. db_client.save(
  62. query=insert_sql,
  63. params=(
  64. account_item["platform"],
  65. account_item["account_id"],
  66. account_item["account_name"],
  67. account_item["max_cursor"],
  68. account_item["account_init_date"],
  69. account_item["status"],
  70. account_item["priority"],
  71. ),
  72. )
  73. except Exception as e:
  74. log(
  75. task="{}_account_crawler".format(account_item["platform"]),
  76. function="save_each_account",
  77. message="save account failed",
  78. data={
  79. "error": str(e),
  80. "traceback": traceback.format_exc(),
  81. "account_id": account_item["account_id"],
  82. },
  83. )
  84. def insert_into_associated_recommendation_table(db_client, associated_recommendation_item):
  85. """
  86. insert recommendation into recommendation table
  87. """
  88. # check whether duplicate video
  89. fetch_query = f"""
  90. select id from video_association
  91. where account_id = %s and platform = %s and recommend_video_id = %s;
  92. """
  93. duplicate_id = db_client.fetch(
  94. query=fetch_query, params=(
  95. associated_recommendation_item["account_id"],
  96. associated_recommendation_item["platform"],
  97. associated_recommendation_item["recommend_video_id"]
  98. )
  99. )
  100. if duplicate_id:
  101. return
  102. # insert into table
  103. insert_query = f"""
  104. insert into video_association
  105. (account_name, account_id, recommend_video_id, title, read_cnt, duration, seed_account, seed_title, recommend_date, platform)
  106. values
  107. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  108. """
  109. try:
  110. db_client.save(
  111. query=insert_query,
  112. params=(
  113. associated_recommendation_item["account_name"],
  114. associated_recommendation_item["account_id"],
  115. associated_recommendation_item["recommend_video_id"],
  116. associated_recommendation_item["title"],
  117. associated_recommendation_item["read_cnt"],
  118. associated_recommendation_item["duration"],
  119. associated_recommendation_item["seed_account"],
  120. associated_recommendation_item["seed_title"],
  121. associated_recommendation_item["recommend_date"],
  122. associated_recommendation_item["platform"]
  123. )
  124. )
  125. except Exception as e:
  126. log(
  127. task="{}_recommendation_crawler".format(associated_recommendation_item["platform"]),
  128. function="save_each_recommendation",
  129. message="save recommendation failed",
  130. data={
  131. "error": str(e),
  132. "traceback": traceback.format_exc(),
  133. "item": associated_recommendation_item
  134. }
  135. )