pipeline.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. """
  2. @author: luojunhui
  3. """
  4. import time
  5. import requests
  6. from applications.functions import title_sim_v2_by_list, is_bad
  7. class LongArticlesPipeline(object):
  8. """
  9. Long articles Pipeline
  10. """
  11. @classmethod
  12. def get_titles(cls, account_name, index_list):
  13. """
  14. :param account_name:
  15. :param index_list:
  16. :return:
  17. """
  18. print("开始请求")
  19. print(account_name)
  20. print(index_list)
  21. url = "http://47.98.154.124:6060/title_list"
  22. response = requests.request(
  23. "POST",
  24. url=url,
  25. headers={},
  26. json={
  27. "account_name": account_name,
  28. "index_list": index_list,
  29. "min_time": None,
  30. "max_time": None,
  31. "msg_type": "9"
  32. }
  33. )
  34. print(response.status_code)
  35. print(response.text)
  36. print(response.json())
  37. return response.json()['title_list']
  38. @classmethod
  39. def history_title(cls, account_nickname):
  40. """
  41. 判断是否存储
  42. :param account_nickname:
  43. :return:
  44. """
  45. # if "【1】" in plan_name or "【2】" in plan_name:
  46. index_list_1 = [1, 2]
  47. index_list_2 = [1, 2, 3, 4, 5, 6, 7, 8]
  48. print("开始请求")
  49. account_title_list_1 = cls.get_titles(
  50. account_nickname,
  51. index_list=index_list_1
  52. )
  53. print(account_title_list_1)
  54. account_title_list_2 = cls.get_titles(
  55. account_nickname,
  56. index_list=index_list_2
  57. )
  58. res = {
  59. "rule_1": account_title_list_1,
  60. "rule_2": account_title_list_2
  61. }
  62. return res
  63. @classmethod
  64. def history_exists(cls, title, account_title_list):
  65. """
  66. 判断文章是否历史已发布
  67. :param title:
  68. :param account_title_list:
  69. :return:
  70. """
  71. sim_res = title_sim_v2_by_list(title, account_title_list)
  72. if sim_res:
  73. return True
  74. return False
  75. @classmethod
  76. def article_safe(cls, title):
  77. """
  78. 判断文章是否安全
  79. """
  80. url = "http://61.48.133.26:8177/sensitive/is_sensitive"
  81. body = {
  82. "text": title
  83. }
  84. response = requests.post(
  85. url=url,
  86. json=body,
  87. headers={"Content-Type": "application/json"}
  88. )
  89. return response.json()['is_sensitive']
  90. @classmethod
  91. def article_bad(cls, title, account_nickname):
  92. """
  93. 判断该文章是否为劣质文章
  94. :param title:
  95. :param account_nickname:
  96. :return:
  97. """
  98. return is_bad(title)
  99. @classmethod
  100. def deal(cls, article_obj, account_name, history_title_dict):
  101. """
  102. :param history_title_dict:
  103. :param account_name:
  104. :param article_obj:
  105. :return:
  106. """
  107. a = time.time()
  108. article_bad_flag = cls.article_bad(
  109. title=article_obj['title'],
  110. account_nickname=account_name
  111. )
  112. b = time.time()
  113. print("历史低质量文章:", b - a)
  114. if article_bad_flag:
  115. response = {
  116. "filterReason": "历史表现差的文章",
  117. "status": True
  118. }
  119. return response
  120. else:
  121. c = time.time()
  122. plan_name = article_obj['producePlanName']
  123. if "【1】" in plan_name or "【2】" in plan_name:
  124. history_title_list = history_title_dict['rule_1']
  125. else:
  126. history_title_list = history_title_dict['rule_2']
  127. history_exists_flag = cls.history_exists(
  128. title=article_obj['title'],
  129. account_title_list=history_title_list
  130. )
  131. d = time.time()
  132. print("历史已经发布文章:", d - c)
  133. if history_exists_flag:
  134. response = {
  135. "filterReason": "历史已发布文章",
  136. "status": True
  137. }
  138. return response
  139. else:
  140. e = time.time()
  141. safe_flag = cls.article_safe(title=article_obj['title'])
  142. f = time.time()
  143. print("安全:", f - e)
  144. if safe_flag:
  145. response = {
  146. "filterReason": "安全违规",
  147. "status": True
  148. }
  149. return response
  150. else:
  151. return False