test4.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import json
  2. import pymysql
  3. import pandas as pd
  4. connection = pymysql.connect(
  5. host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
  6. port=3306,
  7. user='crawler',
  8. password='crawler123456@',
  9. db='piaoquan-crawler',
  10. charset='utf8mb4'
  11. )
  12. columns = pd.read_csv("piaoquan_crawler_long_articles_video.csv").columns.tolist()
  13. def extract_desired_key(json_str):
  14. """
  15. ppp
  16. :param json_str:
  17. :return:
  18. """
  19. try:
  20. data = json.loads(json_str)
  21. return data['productionPath'].split("rootSourceId%3D")[1]
  22. except json.JSONDecodeError:
  23. return None
  24. sql = f"""
  25. select * from long_articles_video where account_name in ("小阳看天下", "小惠爱厨房") and update_time > "2024-07-04";
  26. """
  27. cursor = connection.cursor()
  28. cursor.execute(sql)
  29. result = cursor.fetchall()
  30. connection.close()
  31. L = []
  32. for line in result:
  33. temp = list(line)
  34. temp[-5] = json.loads(temp[-5])['productionPath'].split("rootSourceId%3D")[1] if temp[-5] else None
  35. temp[-6] = json.loads(temp[-6])['productionPath'].split("rootSourceId%3D")[1] if temp[-6] else None
  36. temp[-7] = json.loads(temp[-7])['productionPath'].split("rootSourceId%3D")[1] if temp[-7] else None
  37. print(temp)
  38. L.append(temp)
  39. df = pd.DataFrame(L, columns=columns)
  40. df.to_excel("root_source_id.xlsx", index=False)