pipeline.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. from applications.functions import title_sim_v2_by_list
  6. from applications.functions import get_article_title_url_list
  7. class LongArticlesPipeline(object):
  8. """
  9. Long articles Pipeline
  10. """
  11. @classmethod
  12. def history_exists(cls, title, account_nickname, plan_name):
  13. """
  14. 判断是否存储
  15. :param plan_name:
  16. :param title:
  17. :param account_nickname:
  18. :return:
  19. """
  20. if "【1】" in plan_name or "【2】" in plan_name:
  21. index_list = [1, 2]
  22. else:
  23. index_list = [1, 2, 3, 4, 5, 6, 7, 8]
  24. account_title_list = get_article_title_url_list(
  25. account_nickname,
  26. index_list=index_list
  27. )
  28. sim_res = title_sim_v2_by_list(title, account_title_list)
  29. if sim_res:
  30. return True
  31. return False
  32. @classmethod
  33. def article_safe(cls, title):
  34. """
  35. 判断文章是否安全
  36. """
  37. url = "http://192.168.100.31:8177/sensitive/is_sensitive"
  38. body = {
  39. "text": title
  40. }
  41. response = requests.post(
  42. url=url,
  43. json=body,
  44. headers={"Content-Type": "application/json"}
  45. )
  46. return response.json()['is_sensitive']
  47. @classmethod
  48. def deal(cls, article_obj):
  49. """
  50. :param article_obj:
  51. :return:
  52. """
  53. history_exists_flag = cls.history_exists(
  54. title=article_obj['title'],
  55. account_nickname=article_obj['crawlerAccountName'],
  56. plan_name=article_obj['producePlanName']
  57. )
  58. if history_exists_flag:
  59. response = {
  60. "fileterReason": "历史已发布文章",
  61. "status": True
  62. }
  63. return response
  64. else:
  65. safe_flag = cls.article_safe(title=article_obj['title'])
  66. if safe_flag:
  67. response = {
  68. "fileterReason": "安全违规",
  69. "status": True
  70. }
  71. return response
  72. else:
  73. return False