pipeline.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. from applications.functions import title_sim_v2_by_list
  6. from applications.functions import get_article_title_url_list
  7. class LongArticlesPipeline(object):
  8. """
  9. Long articles Pipeline
  10. """
  11. @classmethod
  12. def history_exists(cls, title, account_nickname, plan_name):
  13. """
  14. 判断是否存储
  15. :param plan_name:
  16. :param title:
  17. :param account_nickname:
  18. :return:
  19. """
  20. if "【1】" in plan_name or "【2】" in plan_name:
  21. index_list = [1, 2]
  22. else:
  23. index_list = [1, 2, 3, 4, 5, 6, 7, 8]
  24. account_title_list = get_article_title_url_list(
  25. account_nickname,
  26. index_list=index_list
  27. )
  28. sim_res = title_sim_v2_by_list(title, account_title_list)
  29. if sim_res:
  30. return True
  31. return False
  32. @classmethod
  33. def article_safe(cls, title):
  34. """
  35. 判断文章是否安全
  36. """
  37. url = "http://192.168.100.31:8177/sensitive/is_sensitive"
  38. body = {
  39. "text": title
  40. }
  41. print(body)
  42. response = requests.post(
  43. url=url,
  44. json=body,
  45. headers={"Content-Type": "application/json"}
  46. )
  47. return response.json()['is_sensitive']
  48. @classmethod
  49. def deal(cls, article_obj):
  50. """
  51. :param article_obj:
  52. :return:
  53. """
  54. history_exists_flag = cls.history_exists(
  55. title=article_obj['title'],
  56. account_nickname=article_obj['crawlerAccountName'],
  57. plan_name=article_obj['producePlanName']
  58. )
  59. if history_exists_flag:
  60. response = {
  61. "fileterReason": "历史已发布文章",
  62. "status": True
  63. }
  64. return response
  65. else:
  66. safe_flag = cls.article_safe(title=article_obj['title'])
  67. if safe_flag:
  68. response = {
  69. "fileterReason": "安全违规",
  70. "status": True
  71. }
  72. return response
  73. else:
  74. return False