pipeline.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import requests
  6. from applications.functions import title_sim_v2_by_list, is_bad
  7. class LongArticlesPipeline(object):
  8. """
  9. Long articles Pipeline
  10. """
  11. @classmethod
  12. def get_titles(cls, account_name, index_list):
  13. """
  14. :param account_name:
  15. :param index_list:
  16. :return:
  17. """
  18. print("开始请求")
  19. url = "http://localhost:6060/title_list"
  20. response = requests.request(
  21. "POST",
  22. url=url,
  23. headers={},
  24. json={
  25. "account_name": account_name,
  26. "index_list": index_list,
  27. "min_time": None,
  28. "max_time": None,
  29. "msg_type": "9"
  30. }
  31. )
  32. print(response.status_code)
  33. print(response.json())
  34. return response.json()['title_list']
  35. @classmethod
  36. def history_title(cls, account_nickname):
  37. """
  38. 判断是否存储
  39. :param account_nickname:
  40. :return:
  41. """
  42. # if "【1】" in plan_name or "【2】" in plan_name:
  43. index_list_1 = [1, 2]
  44. index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
  45. print("开始请求")
  46. account_title_list_1 = cls.get_titles(
  47. account_nickname,
  48. index_list=index_list_1
  49. )
  50. print(account_title_list_1)
  51. account_title_list_2 = cls.get_titles(
  52. account_nickname,
  53. index_list=index_list_2
  54. )
  55. res = {
  56. "rule_1": account_title_list_1,
  57. "rule_2": account_title_list_2
  58. }
  59. return res
  60. @classmethod
  61. def history_exists(cls, title, account_title_list):
  62. """
  63. 判断文章是否历史已发布
  64. :param title:
  65. :param account_title_list:
  66. :return:
  67. """
  68. sim_res = title_sim_v2_by_list(title, account_title_list)
  69. if sim_res:
  70. return True
  71. return False
  72. @classmethod
  73. def article_safe(cls, title):
  74. """
  75. 判断文章是否安全
  76. """
  77. url = "http://61.48.133.26:8177/sensitive/is_sensitive"
  78. body = {
  79. "text": title
  80. }
  81. response = requests.post(
  82. url=url,
  83. json=body,
  84. headers={"Content-Type": "application/json"}
  85. )
  86. return response.json()['is_sensitive']
  87. @classmethod
  88. def article_bad(cls, title, account_nickname):
  89. """
  90. 判断该文章是否为劣质文章
  91. :param title:
  92. :param account_nickname:
  93. :return:
  94. """
  95. return is_bad(title)
  96. @classmethod
  97. def deal(cls, article_obj, account_name, history_title_dict):
  98. """
  99. :param history_title_dict:
  100. :param account_name:
  101. :param article_obj:
  102. :return:
  103. """
  104. a = time.time()
  105. article_bad_flag = cls.article_bad(
  106. title=article_obj['title'],
  107. account_nickname=account_name
  108. )
  109. b = time.time()
  110. print("历史低质量文章:", b - a)
  111. if article_bad_flag:
  112. response = {
  113. "filterReason": "历史表现差的文章",
  114. "status": True
  115. }
  116. return response
  117. else:
  118. c = time.time()
  119. plan_name = article_obj['producePlanName']
  120. if "【1】" in plan_name or "【2】" in plan_name:
  121. history_title_list = history_title_dict['rule_1']
  122. else:
  123. history_title_list = history_title_dict['rule_2']
  124. history_exists_flag = cls.history_exists(
  125. title=article_obj['title'],
  126. account_title_list=history_title_list
  127. )
  128. d = time.time()
  129. print("历史已经发布文章:", d - c)
  130. if history_exists_flag:
  131. response = {
  132. "filterReason": "历史已发布文章",
  133. "status": True
  134. }
  135. return response
  136. else:
  137. e = time.time()
  138. safe_flag = cls.article_safe(title=article_obj['title'])
  139. f = time.time()
  140. print("安全:", f - e)
  141. if safe_flag:
  142. response = {
  143. "filterReason": "安全违规",
  144. "status": True
  145. }
  146. return response
  147. else:
  148. return False