save_to_db.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. """
  2. @author: luojunhui
  3. """
  4. import traceback
  5. from applications.aliyunLogApi import log
  6. def insert_into_single_video_source_table(db_client, video_item):
  7. """
  8. insert video into single video source table
  9. """
  10. insert_sql = f"""
  11. INSERT INTO publish_single_video_source
  12. (content_trace_id, article_title, out_account_id, out_account_name, read_cnt, like_cnt, article_url, cover_url,
  13. video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, category, tags, platform, source_account,
  14. category_status, audit_status, audit_video_id, mini_program_title
  15. )
  16. values
  17. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  18. """
  19. try:
  20. db_client.save(
  21. query=insert_sql,
  22. params=(
  23. video_item["content_trace_id"],
  24. video_item["article_title"],
  25. video_item["out_account_id"],
  26. video_item["out_account_name"],
  27. video_item["read_cnt"],
  28. video_item["like_cnt"],
  29. video_item["article_url"],
  30. video_item["cover_url"],
  31. video_item["video_oss_path"],
  32. video_item["publish_timestamp"],
  33. video_item["crawler_timestamp"],
  34. video_item["url_unique_md5"],
  35. video_item["category"],
  36. video_item["tags"],
  37. video_item["platform"],
  38. video_item["source_account"],
  39. video_item["category_status"],
  40. video_item["audit_status"],
  41. video_item["audit_video_id"],
  42. video_item["mini_program_title"],
  43. ),
  44. )
  45. except Exception as e:
  46. log(
  47. task="{}_video_crawler".format(video_item["platform"]),
  48. function="save_each_video",
  49. message="save video failed",
  50. data={
  51. "error": str(e),
  52. "traceback": traceback.format_exc(),
  53. "video_id": video_item["url_unique_md5"],
  54. "oss_path": video_item["video_oss_path"],
  55. },
  56. )
  57. def insert_into_article_meta_table(db_client, article_item):
  58. """
  59. insert article meta table
  60. """
  61. insert_query = f"""
  62. insert into crawler_meta_article
  63. (
  64. platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt,
  65. description, publish_time, crawler_time, status, unique_index, llm_sensitivity, title_sensitivity
  66. )
  67. VALUES
  68. (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
  69. """
  70. try:
  71. db_client.save(
  72. query=insert_query,
  73. params=(
  74. article_item.platform,
  75. article_item.mode,
  76. article_item.category,
  77. article_item.out_account_id,
  78. article_item.article_index,
  79. article_item.title,
  80. article_item.link,
  81. article_item.read_cnt,
  82. article_item.like_cnt,
  83. article_item.description,
  84. article_item.publish_time,
  85. article_item.crawler_time,
  86. article_item.status,
  87. article_item.unique_index,
  88. article_item.llm_sensitivity,
  89. article_item.title_sensitivity
  90. ),
  91. )
  92. except Exception as e:
  93. log(
  94. task="{}_article_crawler".format(article_item.platform),
  95. function="save_each_article",
  96. message="save article failed",
  97. data={
  98. "error": str(e),
  99. "traceback": traceback.format_exc(),
  100. "link": article_item.link
  101. }
  102. )