pipeline.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import requests
  6. from applications.functions import title_sim_v2_by_list
  7. from applications.functions import get_article_titles
  8. class LongArticlesPipeline(object):
  9. """
  10. Long articles Pipeline
  11. """
  12. @classmethod
  13. def history_title(cls, account_nickname):
  14. """
  15. 判断是否存储
  16. :param account_nickname:
  17. :return:
  18. """
  19. # if "【1】" in plan_name or "【2】" in plan_name:
  20. index_list_1 = [1, 2]
  21. index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
  22. account_title_list_1 = get_article_titles(
  23. account_nickname,
  24. index_list=index_list_1
  25. )
  26. account_title_list_2 = get_article_titles(
  27. account_nickname,
  28. index_list=index_list_2
  29. )
  30. res = {
  31. "rule_1": account_title_list_1,
  32. "rule_2": account_title_list_2
  33. }
  34. return res
  35. @classmethod
  36. def history_exists(cls, title, account_title_list):
  37. """
  38. 判断文章是否历史已发布
  39. :param title:
  40. :param account_title_list:
  41. :return:
  42. """
  43. sim_res = title_sim_v2_by_list(title, account_title_list)
  44. if sim_res:
  45. return True
  46. return False
  47. @classmethod
  48. def article_safe(cls, title):
  49. """
  50. 判断文章是否安全
  51. """
  52. url = "http://192.168.100.31:8177/sensitive/is_sensitive"
  53. body = {
  54. "text": title
  55. }
  56. response = requests.post(
  57. url=url,
  58. json=body,
  59. headers={"Content-Type": "application/json"}
  60. )
  61. return response.json()['is_sensitive']
  62. @classmethod
  63. def article_bad(cls, title, account_nickname):
  64. """
  65. 判断该文章是否为劣质文章
  66. :param title:
  67. :param account_nickname:
  68. :return:
  69. """
  70. url = "http://192.168.100.31:8176/bad/is_bad"
  71. headers = {
  72. "accept": "application/json",
  73. "Content-Type": "application/json"
  74. }
  75. body = {
  76. "account_nickname": account_nickname,
  77. "title": title
  78. }
  79. response = requests.request(
  80. "POST",
  81. url=url,
  82. headers=headers,
  83. json=body
  84. )
  85. return response.json()['is_bad']
  86. @classmethod
  87. def deal(cls, article_obj, account_name, history_title_dict):
  88. """
  89. :param history_title_dict:
  90. :param account_name:
  91. :param article_obj:
  92. :return:
  93. """
  94. a = time.time()
  95. article_bad_flag = cls.article_bad(
  96. title=article_obj['title'],
  97. account_nickname=account_name
  98. )
  99. b = time.time()
  100. print("历史低质量文章:", b - a)
  101. if article_bad_flag:
  102. response = {
  103. "filterReason": "历史表现差的文章",
  104. "status": True
  105. }
  106. return response
  107. else:
  108. c = time.time()
  109. plan_name = article_obj['producePlanName']
  110. if "【1】" in plan_name or "【2】" in plan_name:
  111. history_title_list = history_title_dict['rule_1']
  112. else:
  113. history_title_list = history_title_dict['rule_2']
  114. history_exists_flag = cls.history_exists(
  115. title=article_obj['title'],
  116. account_title_list=history_title_list
  117. )
  118. d = time.time()
  119. print("历史已经发布文章:", d - c)
  120. if history_exists_flag:
  121. response = {
  122. "filterReason": "历史已发布文章",
  123. "status": True
  124. }
  125. return response
  126. else:
  127. e = time.time()
  128. safe_flag = cls.article_safe(title=article_obj['title'])
  129. f = time.time()
  130. print("安全:", f - e)
  131. if safe_flag:
  132. response = {
  133. "filterReason": "安全违规",
  134. "status": True
  135. }
  136. return response
  137. else:
  138. return False