pipeline.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. from applications.functions import title_sim_v2_by_list
  6. from applications.functions import get_article_title_url_list
  7. class LongArticlesPipeline(object):
  8. """
  9. Long articles Pipeline
  10. """
  11. @classmethod
  12. def history_exists(cls, title, account_nickname, plan_name):
  13. """
  14. 判断是否存储
  15. :param plan_name:
  16. :param title:
  17. :param account_nickname:
  18. :return:
  19. """
  20. if "【1】" in plan_name or "【2】" in plan_name:
  21. index_list = [1, 2]
  22. else:
  23. index_list = [1, 2, 3, 4, 5, 6, 7, 8]
  24. account_title_list = get_article_title_url_list(
  25. account_nickname,
  26. index_list=index_list
  27. )
  28. sim_res = title_sim_v2_by_list(title, account_title_list)
  29. if sim_res:
  30. return True
  31. return False
  32. @classmethod
  33. def article_safe(cls, title):
  34. """
  35. 判断文章是否安全
  36. """
  37. url = "http://192.168.100.31:8177/sensitive/is_sensitive"
  38. body = {
  39. "text": title
  40. }
  41. response = requests.post(
  42. url=url,
  43. json=body,
  44. headers={"Content-Type": "application/json"}
  45. )
  46. return response.json()['is_sensitive']
  47. @classmethod
  48. def article_bad(cls, title, account_nickname):
  49. """
  50. 判断该文章是否为劣质文章
  51. :param title:
  52. :param account_nickname:
  53. :return:
  54. """
  55. url = "http://192.168.100.31:8176/bad/is_bad"
  56. headers = {
  57. "accept": "application/json",
  58. "Content-Type": "application/json"
  59. }
  60. body = {
  61. "account_nickname": account_nickname,
  62. "title": title
  63. }
  64. response = requests.request(
  65. "POST",
  66. url=url,
  67. headers=headers,
  68. json=body
  69. )
  70. return response.json()['is_bad']
  71. @classmethod
  72. def deal(cls, article_obj):
  73. """
  74. :param article_obj:
  75. :return:
  76. """
  77. article_bad_flag = cls.article_bad(
  78. title=article_obj['title'],
  79. account_nickname=article_obj['crawlerAccountName']
  80. )
  81. if article_bad_flag:
  82. response = {
  83. "filterReason": "历史表现差的文章",
  84. "status": True
  85. }
  86. return response
  87. else:
  88. history_exists_flag = cls.history_exists(
  89. title=article_obj['title'],
  90. account_nickname=article_obj['crawlerAccountName'],
  91. plan_name=article_obj['producePlanName']
  92. )
  93. if history_exists_flag:
  94. response = {
  95. "filterReason": "历史已发布文章",
  96. "status": True
  97. }
  98. return response
  99. else:
  100. safe_flag = cls.article_safe(title=article_obj['title'])
  101. if safe_flag:
  102. response = {
  103. "filterReason": "安全违规",
  104. "status": True
  105. }
  106. return response
  107. else:
  108. return False