save_to_db.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. """
  2. @author: luojunhui
  3. """
  4. import traceback
  5. from applications.aliyunLogApi import log
  6. def insert_into_single_video_source_table(db_client, video_item):
  7. """
  8. insert video into single video source table
  9. """
  10. insert_sql = f"""
  11. INSERT INTO publish_single_video_source
  12. (content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url, video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account)
  13. values
  14. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  15. """
  16. try:
  17. db_client.save(
  18. query=insert_sql,
  19. params=(
  20. video_item["content_trace_id"],
  21. video_item["article_title"],
  22. video_item["out_account_id"],
  23. video_item["out_account_name"],
  24. video_item["read_cnt"],
  25. video_item["like_cnt"],
  26. video_item["article_url"],
  27. video_item["cover_url"],
  28. video_item["video_oss_path"],
  29. video_item["publish_timestamp"],
  30. video_item["crawler_timestamp"],
  31. video_item["url_unique_md5"],
  32. video_item["category"],
  33. video_item["tags"],
  34. video_item["platform"],
  35. video_item["source_account"],
  36. ),
  37. )
  38. except Exception as e:
  39. log(
  40. task="{}_video_crawler".format(video_item["platform"]),
  41. function="save_each_video",
  42. message="save video failed",
  43. data={
  44. "error": str(e),
  45. "traceback": traceback.format_exc(),
  46. "video_id": video_item["url_unique_md5"],
  47. "oss_path": video_item["video_oss_path"],
  48. },
  49. )
  50. def insert_into_video_meta_accounts_table(db_client, account_item):
  51. """
  52. insert account into account meta table
  53. """
  54. insert_sql = f"""
  55. insert into video_meta_accounts
  56. (platform, account_id, account_name, max_cursor, account_init_date, status, priority)
  57. values
  58. (%s, %s, %s, %s, %s, %s, %s);
  59. """
  60. try:
  61. db_client.save(
  62. query=insert_sql,
  63. params=(
  64. account_item["platform"],
  65. account_item["account_id"],
  66. account_item["account_name"],
  67. account_item["max_cursor"],
  68. account_item["account_init_date"],
  69. account_item["status"],
  70. account_item["priority"],
  71. ),
  72. )
  73. except Exception as e:
  74. log(
  75. task="{}_account_crawler".format(account_item["platform"]),
  76. function="save_each_account",
  77. message="save account failed",
  78. data={
  79. "error": str(e),
  80. "traceback": traceback.format_exc(),
  81. "account_id": account_item["account_id"],
  82. },
  83. )
  84. def insert_into_candidate_account_pool_table(db_client, account_item):
  85. """
  86. insert recommendation into recommendation table
  87. """
  88. # check whether duplicate video
  89. fetch_query = f"""
  90. select id from crawler_candidate_account_pool
  91. where account_id = %s and platform = %s;
  92. """
  93. duplicate_id = db_client.fetch(
  94. query=fetch_query, params=(
  95. account_item["account_id"],
  96. account_item["platform"]
  97. )
  98. )
  99. if duplicate_id:
  100. return
  101. # insert into table
  102. insert_query = f"""
  103. insert into crawler_candidate_account_pool
  104. (account_name, account_id, title_list, platform, crawler_date)
  105. values
  106. (%s, %s, %s, %s, %s)
  107. """
  108. try:
  109. db_client.save(
  110. query=insert_query,
  111. params=(
  112. account_item["account_name"],
  113. account_item["account_id"],
  114. account_item["title_list"],
  115. account_item["platform"],
  116. account_item["crawler_date"]
  117. )
  118. )
  119. except Exception as e:
  120. log(
  121. task="{}_account_crawler".format(account_item["platform"]),
  122. function="save_each_account",
  123. message="save account failed",
  124. data={
  125. "error": str(e),
  126. "traceback": traceback.format_exc(),
  127. "item": account_item
  128. }
  129. )