pipeline.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import requests
  6. from applications.functions import title_sim_v2_by_list, is_bad
  7. from applications.config import port
  8. class LongArticlesPipeline(object):
  9. """
  10. Long articles Pipeline
  11. """
  12. @classmethod
  13. def get_titles(cls, account_name, index_list):
  14. """
  15. :param account_name:
  16. :param index_list:
  17. :return:
  18. """
  19. print("开始请求")
  20. print(account_name)
  21. print(index_list)
  22. url = "http://localhost:{}/title_list".format(port)
  23. response = requests.request(
  24. "POST",
  25. url=url,
  26. headers={},
  27. json={
  28. "account_name": account_name,
  29. "index_list": index_list,
  30. "min_time": None,
  31. "max_time": None,
  32. "msg_type": "9"
  33. }
  34. )
  35. print(response.status_code)
  36. print(response.text)
  37. print(response.json())
  38. return response.json()['title_list']
  39. @classmethod
  40. def history_title(cls, account_nickname):
  41. """
  42. 判断是否存储
  43. :param account_nickname:
  44. :return:
  45. """
  46. # if "【1】" in plan_name or "【2】" in plan_name:
  47. index_list_1 = [1, 2]
  48. index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
  49. print("开始请求")
  50. account_title_list_1 = cls.get_titles(
  51. account_nickname,
  52. index_list=index_list_1
  53. )
  54. print(account_title_list_1)
  55. account_title_list_2 = cls.get_titles(
  56. account_nickname,
  57. index_list=index_list_2
  58. )
  59. res = {
  60. "rule_1": account_title_list_1,
  61. "rule_2": account_title_list_2
  62. }
  63. return res
  64. @classmethod
  65. def history_exists(cls, title, account_title_list):
  66. """
  67. 判断文章是否历史已发布
  68. :param title:
  69. :param account_title_list:
  70. :return:
  71. """
  72. sim_res = title_sim_v2_by_list(title, account_title_list)
  73. if sim_res:
  74. return True
  75. return False
  76. @classmethod
  77. def article_safe(cls, title):
  78. """
  79. 判断文章是否安全
  80. """
  81. url = "http://61.48.133.26:8177/sensitive/is_sensitive"
  82. body = {
  83. "text": title
  84. }
  85. response = requests.post(
  86. url=url,
  87. json=body,
  88. headers={"Content-Type": "application/json"}
  89. )
  90. return response.json()['is_sensitive']
  91. @classmethod
  92. def article_bad(cls, title, account_nickname):
  93. """
  94. 判断该文章是否为劣质文章
  95. :param title:
  96. :param account_nickname:
  97. :return:
  98. """
  99. return is_bad(title)
  100. @classmethod
  101. def deal(cls, article_obj, account_name, history_title_dict):
  102. """
  103. :param history_title_dict:
  104. :param account_name:
  105. :param article_obj:
  106. :return:
  107. """
  108. a = time.time()
  109. article_bad_flag = cls.article_bad(
  110. title=article_obj['title'],
  111. account_nickname=account_name
  112. )
  113. b = time.time()
  114. print("历史低质量文章:", b - a)
  115. if article_bad_flag:
  116. response = {
  117. "filterReason": "历史表现差的文章",
  118. "status": True
  119. }
  120. return response
  121. else:
  122. c = time.time()
  123. plan_name = article_obj['producePlanName']
  124. if "【1】" in plan_name or "【2】" in plan_name:
  125. history_title_list = history_title_dict['rule_1']
  126. else:
  127. history_title_list = history_title_dict['rule_2']
  128. history_exists_flag = cls.history_exists(
  129. title=article_obj['title'],
  130. account_title_list=history_title_list
  131. )
  132. d = time.time()
  133. print("历史已经发布文章:", d - c)
  134. if history_exists_flag:
  135. response = {
  136. "filterReason": "历史已发布文章",
  137. "status": True
  138. }
  139. return response
  140. else:
  141. e = time.time()
  142. safe_flag = cls.article_safe(title=article_obj['title'])
  143. f = time.time()
  144. print("安全:", f - e)
  145. if safe_flag:
  146. response = {
  147. "filterReason": "安全违规",
  148. "status": True
  149. }
  150. return response
  151. else:
  152. return False