basic.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import os
  2. import time
  3. import datetime
  4. import requests
  5. def get_status_field_by_process(process):
  6. match process:
  7. case "upload":
  8. status = "upload_status"
  9. update_timestamp = "upload_status_ts"
  10. case "understanding":
  11. status = "understanding_status"
  12. update_timestamp = "understanding_status_ts"
  13. case "summary":
  14. status = "summary_status"
  15. update_timestamp = "summary_status_ts"
  16. case "rewrite":
  17. status = "rewrite_status"
  18. update_timestamp = "rewrite_status_ts"
  19. case _:
  20. raise ValueError(f"Unexpected task: {process}")
  21. return status, update_timestamp
  22. def roll_back_lock_tasks(
  23. db_client, process, max_process_time, init_status, processing_status
  24. ) -> int:
  25. """
  26. rollback tasks which have been locked for a long time
  27. """
  28. status, update_timestamp = get_status_field_by_process(process)
  29. now_timestamp = int(time.time())
  30. timestamp_threshold = now_timestamp - max_process_time
  31. update_query = f"""
  32. update video_content_understanding
  33. set {status} = %s
  34. where {status} = %s and {update_timestamp} < %s;
  35. """
  36. rollback_rows = db_client.save(
  37. query=update_query, params=(init_status, processing_status, timestamp_threshold)
  38. )
  39. return rollback_rows
  40. def download_file(task_id, oss_path):
  41. """
  42. 下载视频文件
  43. """
  44. video_url = "https://rescdn.yishihui.com/" + oss_path
  45. file_name = "static/{}.mp4".format(task_id)
  46. if os.path.exists(file_name):
  47. return file_name
  48. proxies = {"http": None, "https": None}
  49. with open(file_name, "wb") as f:
  50. response = requests.get(video_url, proxies=proxies)
  51. f.write(response.content)
  52. return file_name
  53. def generate_summary_prompt(text):
  54. prompt = f"""
  55. 你是1个优秀的公众号文章写作大师,我对你有以下要求
  56. 视频总结:{text}
  57. 第一个要求:请仔细阅读以上视频总结,挑选其中最吸引人的情节或话题,总结为100字左右文章精彩总结(字数计算包括标点符号),这部分内容为段落1。
  58. 句子段落之间以悬念承接,可以吸引读者往下读第二句。
  59. 第二个要求:在这100字内容的结尾处,增加1-2句话的引导,引导大家去观看上面的视频了解详情,可以加一些emoji表情。注意是点击上面的视频,不是下面的视频。这部分内容为段落2。
  60. 你最终输出一段总结内容,将第一段和第二段之间空格一行。不用加标题或者主题,也不用写第几段、多少字这样的话。整体的语言风格要口语化、直接点,要让60岁以上的老年人能看懂、能共情。人的名字尽量用全名,不用简称。
  61. """
  62. return prompt
  63. def update_task_queue_status(db_client, task_id, process, ori_status, new_status):
  64. """
  65. 回滚长时间处于处理中的任务
  66. """
  67. status, update_timestamp = get_status_field_by_process(process)
  68. update_query = f"""
  69. update video_content_understanding
  70. set {status} = %s, {update_timestamp} = %s
  71. where {status} = %s and id = %s;
  72. """
  73. roll_back_rows = db_client.save(
  74. query=update_query,
  75. params=(
  76. new_status,
  77. datetime.datetime.now(),
  78. ori_status,
  79. task_id,
  80. ),
  81. )
  82. return roll_back_rows
  83. def update_video_pool_status(db_client, content_trace_id, ori_status, new_status):
  84. """
  85. 回滚长时间处于处理中的任务
  86. """
  87. update_sql = f"""
  88. update publish_single_video_source
  89. set status = %s
  90. where content_trace_id = %s and status = %s;
  91. """
  92. # update publish_single_source_status
  93. update_query = f"""
  94. update publish_single_video_source
  95. set status = %s
  96. where content_trace_id = %s and status = %s
  97. """
  98. affected_rows = db_client.save(
  99. query=update_query, params=(new_status, content_trace_id, ori_status)
  100. )
  101. return affected_rows