pipeline.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import requests
  6. from applications.functions import title_sim_v2_by_list
  7. class LongArticlesPipeline(object):
  8. """
  9. Long articles Pipeline
  10. """
  11. @classmethod
  12. def get_titles(cls, account_name, index_list):
  13. """
  14. :param account_name:
  15. :param index_list:
  16. :return:
  17. """
  18. url = "http://192.168.100.31:6062/title_list"
  19. response = requests.request(
  20. "POST",
  21. url=url,
  22. headers={},
  23. json={
  24. "account_name": account_name,
  25. "index_list": index_list,
  26. "min_time": None,
  27. "max_time": None,
  28. "msg_type": "9"
  29. }
  30. )
  31. return response.json()['title_list']
  32. @classmethod
  33. def history_title(cls, account_nickname):
  34. """
  35. 判断是否存储
  36. :param account_nickname:
  37. :return:
  38. """
  39. # if "【1】" in plan_name or "【2】" in plan_name:
  40. index_list_1 = [1, 2]
  41. index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
  42. account_title_list_1 = cls.get_titles(
  43. account_nickname,
  44. index_list=index_list_1
  45. )
  46. account_title_list_2 = cls.get_titles(
  47. account_nickname,
  48. index_list=index_list_2
  49. )
  50. res = {
  51. "rule_1": account_title_list_1,
  52. "rule_2": account_title_list_2
  53. }
  54. return res
  55. @classmethod
  56. def history_exists(cls, title, account_title_list):
  57. """
  58. 判断文章是否历史已发布
  59. :param title:
  60. :param account_title_list:
  61. :return:
  62. """
  63. sim_res = title_sim_v2_by_list(title, account_title_list)
  64. if sim_res:
  65. return True
  66. return False
  67. @classmethod
  68. def article_safe(cls, title):
  69. """
  70. 判断文章是否安全
  71. """
  72. url = "http://61.48.133.26:8177/sensitive/is_sensitive"
  73. body = {
  74. "text": title
  75. }
  76. response = requests.post(
  77. url=url,
  78. json=body,
  79. headers={"Content-Type": "application/json"}
  80. )
  81. return response.json()['is_sensitive']
  82. @classmethod
  83. def article_bad(cls, title, account_nickname):
  84. """
  85. 判断该文章是否为劣质文章
  86. :param title:
  87. :param account_nickname:
  88. :return:
  89. """
  90. url = "http://192.168.100.31:8176/bad/is_bad"
  91. headers = {
  92. "accept": "application/json",
  93. "Content-Type": "application/json"
  94. }
  95. body = {
  96. "account_nickname": account_nickname,
  97. "title": title
  98. }
  99. response = requests.request(
  100. "POST",
  101. url=url,
  102. headers=headers,
  103. json=body
  104. )
  105. return response.json()['is_bad']
  106. @classmethod
  107. def deal(cls, article_obj, account_name, history_title_dict):
  108. """
  109. :param history_title_dict:
  110. :param account_name:
  111. :param article_obj:
  112. :return:
  113. """
  114. a = time.time()
  115. article_bad_flag = cls.article_bad(
  116. title=article_obj['title'],
  117. account_nickname=account_name
  118. )
  119. b = time.time()
  120. print("历史低质量文章:", b - a)
  121. if article_bad_flag:
  122. response = {
  123. "filterReason": "历史表现差的文章",
  124. "status": True
  125. }
  126. return response
  127. else:
  128. c = time.time()
  129. plan_name = article_obj['producePlanName']
  130. if "【1】" in plan_name or "【2】" in plan_name:
  131. history_title_list = history_title_dict['rule_1']
  132. else:
  133. history_title_list = history_title_dict['rule_2']
  134. history_exists_flag = cls.history_exists(
  135. title=article_obj['title'],
  136. account_title_list=history_title_list
  137. )
  138. d = time.time()
  139. print("历史已经发布文章:", d - c)
  140. if history_exists_flag:
  141. response = {
  142. "filterReason": "历史已发布文章",
  143. "status": True
  144. }
  145. return response
  146. else:
  147. e = time.time()
  148. safe_flag = cls.article_safe(title=article_obj['title'])
  149. f = time.time()
  150. print("安全:", f - e)
  151. if safe_flag:
  152. response = {
  153. "filterReason": "安全违规",
  154. "status": True
  155. }
  156. return response
  157. else:
  158. return False