common.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # encoding: utf-8
  2. """
  3. @author: luojunhui
  4. """
  5. import random
  6. import aiohttp
  7. import asyncio
  8. def shuffle_list(ori_list):
  9. """
  10. 随机打乱 list
  11. :param ori_list:
  12. :return:
  13. """
  14. shuffled_list = ori_list[:] # 使用切片操作创建副本
  15. random.shuffle(shuffled_list)
  16. return shuffled_list
  17. def clean_title(strings):
  18. """
  19. :param strings:
  20. :return:
  21. """
  22. return (
  23. strings.strip()
  24. .replace("\n", "")
  25. .replace("/", "")
  26. .replace("\r", "")
  27. .replace("#", "")
  28. .replace(".", "。")
  29. .replace("\\", "")
  30. .replace("&NBSP", "")
  31. .replace(":", "")
  32. .replace("*", "")
  33. .replace("?", "")
  34. .replace("?", "")
  35. .replace('"', "")
  36. .replace("<", "")
  37. .replace(">", "")
  38. .replace("|", "")
  39. .replace(" ", "")
  40. .replace('"', "")
  41. .replace("'", "")
  42. )
  43. def sensitive_flag(s_words, ori_title):
  44. """
  45. :param s_words:
  46. :param ori_title:
  47. :return:
  48. """
  49. for word in s_words:
  50. if str(word) in ori_title:
  51. return False
  52. return True
  53. async def request_etl(url, headers, json_data, retries=6):
  54. """
  55. :param url:
  56. :param headers:
  57. :param json_data:
  58. :param retries:
  59. :return:
  60. """
  61. async with aiohttp.ClientSession() as session:
  62. for attempt in range(retries):
  63. try:
  64. async with session.post(url, headers=headers, json=json_data, timeout=120) as response:
  65. return await response.json()
  66. except asyncio.TimeoutError:
  67. if attempt < retries - 1:
  68. await asyncio.sleep(2) # 等待一段时间后重试
  69. else:
  70. raise
  71. async def async_post(url, headers, payload):
  72. """
  73. :param url:
  74. :param headers:
  75. :param payload:
  76. :return:
  77. """
  78. retries = 3
  79. async with aiohttp.ClientSession() as session:
  80. for attempt in range(3):
  81. try:
  82. async with session.post(url, headers=headers, data=payload, timeout=60) as response:
  83. return await response.json()
  84. except asyncio.TimeoutError:
  85. if attempt < retries - 1:
  86. await asyncio.sleep(2) # 等待一段时间后重试
  87. else:
  88. raise