save_to_db.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. """
  2. @author: luojunhui
  3. """
  4. import traceback
  5. from applications.aliyunLogApi import log
  6. def insert_into_single_video_source_table(db_client, video_item):
  7. """
  8. insert video into single video source table
  9. """
  10. insert_sql = f"""
  11. INSERT INTO publish_single_video_source
  12. (content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url,
  13. video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account,
  14. category_status, audit_status, audit_video_id, mini_program_title)
  15. values
  16. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  17. """
  18. try:
  19. db_client.save(
  20. query=insert_sql,
  21. params=(
  22. video_item["content_trace_id"],
  23. video_item["article_title"],
  24. video_item["out_account_id"],
  25. video_item["out_account_name"],
  26. video_item["read_cnt"],
  27. video_item["like_cnt"],
  28. video_item["article_url"],
  29. video_item["cover_url"],
  30. video_item["video_oss_path"],
  31. video_item["publish_timestamp"],
  32. video_item["crawler_timestamp"],
  33. video_item["url_unique_md5"],
  34. video_item["category"],
  35. video_item["tags"],
  36. video_item["platform"],
  37. video_item["source_account"],
  38. video_item["category_status"],
  39. video_item["audit_status"],
  40. video_item["audit_video_id"],
  41. video_item["mini_program_title"],
  42. ),
  43. )
  44. except Exception as e:
  45. log(
  46. task="{}_video_crawler".format(video_item["platform"]),
  47. function="save_each_video",
  48. message="save video failed",
  49. data={
  50. "error": str(e),
  51. "traceback": traceback.format_exc(),
  52. "video_id": video_item["url_unique_md5"],
  53. "oss_path": video_item["video_oss_path"],
  54. },
  55. )
  56. def insert_into_video_meta_accounts_table(db_client, account_item):
  57. """
  58. insert account into account meta table
  59. """
  60. insert_sql = f"""
  61. insert into video_meta_accounts
  62. (platform, account_id, account_name, max_cursor, account_init_date, status, priority)
  63. values
  64. (%s, %s, %s, %s, %s, %s, %s);
  65. """
  66. try:
  67. db_client.save(
  68. query=insert_sql,
  69. params=(
  70. account_item["platform"],
  71. account_item["account_id"],
  72. account_item["account_name"],
  73. account_item["max_cursor"],
  74. account_item["account_init_date"],
  75. account_item["status"],
  76. account_item["priority"],
  77. ),
  78. )
  79. except Exception as e:
  80. log(
  81. task="{}_account_crawler".format(account_item["platform"]),
  82. function="save_each_account",
  83. message="save account failed",
  84. data={
  85. "error": str(e),
  86. "traceback": traceback.format_exc(),
  87. "account_id": account_item["account_id"],
  88. },
  89. )
  90. def insert_into_candidate_account_pool_table(db_client, account_item):
  91. """
  92. insert recommendation into recommendation table
  93. """
  94. # check whether duplicate video
  95. fetch_query = f"""
  96. select id from crawler_candidate_account_pool
  97. where account_id = %s and platform = %s;
  98. """
  99. duplicate_id = db_client.fetch(
  100. query=fetch_query, params=(
  101. account_item["account_id"],
  102. account_item["platform"]
  103. )
  104. )
  105. if duplicate_id:
  106. print("duplicate id: {}".format(duplicate_id))
  107. return
  108. # insert into table
  109. insert_query = f"""
  110. insert into crawler_candidate_account_pool
  111. (account_name, account_id, title_list, platform, crawler_date)
  112. values
  113. (%s, %s, %s, %s, %s)
  114. """
  115. try:
  116. db_client.save(
  117. query=insert_query,
  118. params=(
  119. account_item["account_name"],
  120. account_item["account_id"],
  121. account_item["title_list"],
  122. account_item["platform"],
  123. account_item["crawler_date"]
  124. )
  125. )
  126. except Exception as e:
  127. log(
  128. task="{}_account_crawler".format(account_item["platform"]),
  129. function="save_each_account",
  130. message="save account failed",
  131. data={
  132. "error": str(e),
  133. "traceback": traceback.format_exc(),
  134. "item": account_item
  135. }
  136. )