odps_data.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import requests
  2. import json
  3. import datetime
  4. from odps import ODPS
  5. # ODPS服务配置
  6. ODPS_CONFIG = {
  7. 'ENDPOINT': 'http://service.cn.maxcompute.aliyun.com/api',
  8. 'ACCESSID': 'LTAIWYUujJAm7CbH',
  9. 'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
  10. }
  11. class OdpsDataCount:
  12. @classmethod
  13. def get_data_count(cls, project, table, dt):
  14. odps = ODPS(
  15. access_id=ODPS_CONFIG['ACCESSID'],
  16. secret_access_key=ODPS_CONFIG['ACCESSKEY'],
  17. project=project,
  18. endpoint=ODPS_CONFIG['ENDPOINT']
  19. )
  20. data_values = []
  21. try:
  22. sql = f'SELECT videoid,title,video_path,type FROM {project}.{table} WHERE dt = "{dt}" '
  23. with odps.execute_sql(sql).open_reader() as reader:
  24. for row in reader:
  25. data_values.append(json.dumps( {"video_id": row[0], "title": row[1], "video_path": row[2], "type": row[3], "partition": str(dt)}, ensure_ascii=False ))
  26. except Exception as e:
  27. print(f"An error occurred: {e}")
  28. return data_values
  29. return data_values
  30. @classmethod
  31. def main(cls, table, dt):
  32. # dt = datetime.datetime.now().strftime('%Y%m%d%H')
  33. project = 'loghubods'
  34. # table = 'content_ai_tag_return_top'
  35. data_count = cls.get_data_count(project=project, table=table, dt= dt)
  36. print(len(data_count))
  37. return data_count
  38. if __name__ == '__main__':
  39. OdpsDataCount.main()