odps_data.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. import json
  2. import datetime
  3. from odps import ODPS
  4. # ODPS服务配置
  5. ODPS_CONFIG = {
  6. 'ENDPOINT': 'http://service.cn.maxcompute.aliyun.com/api',
  7. 'ACCESSID': 'LTAIWYUujJAm7CbH',
  8. 'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
  9. 'PROJECT': 'loghubods'
  10. }
  11. class OdpsDataCount:
  12. @classmethod
  13. def get_data_count(cls, dt):
  14. odps = ODPS(
  15. access_id=ODPS_CONFIG['ACCESSID'],
  16. secret_access_key=ODPS_CONFIG['ACCESSKEY'],
  17. project=ODPS_CONFIG['PROJECT'],
  18. endpoint=ODPS_CONFIG['ENDPOINT']
  19. )
  20. data_values = []
  21. try:
  22. sql = f'SELECT uid,videoid,return_uv,type,type_owner,channel,channel_owner,title FROM loghubods.all_apptype_top1000_return WHERE dt = "{dt}" and rank <= 100'
  23. with odps.execute_sql(sql).open_reader() as reader:
  24. for row in reader:
  25. data_values.append(json.dumps( {"uid": row[0], "videoid": row[1], "return_uv": row[2], "type": row[3], "type_owner": row[4], "channel": row[5], "channel_owner": row[6], "title": row[7], "dt": str(dt)}, ensure_ascii=False ))
  26. except Exception as e:
  27. print(f"An error occurred: {e}")
  28. return data_values
  29. return data_values
  30. @classmethod
  31. def main(cls):
  32. dt = (datetime.datetime.now() - datetime.timedelta(hours=1)).strftime('%Y%m%d%H')
  33. data_count = cls.get_data_count(dt= dt)
  34. print(len(data_count))
  35. return data_count
  36. if __name__ == '__main__':
  37. OdpsDataCount.main()