odps_data.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import json
  2. import datetime
  3. import math
  4. import random
  5. from odps import ODPS
  6. # ODPS服务配置
  7. ODPS_CONFIG = {
  8. 'ENDPOINT': 'http://service.cn.maxcompute.aliyun.com/api',
  9. 'ACCESSID': 'LTAIWYUujJAm7CbH',
  10. 'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
  11. 'PROJECT': 'loghubods'
  12. }
  13. class OdpsDataCount:
  14. @classmethod
  15. def get_data_count(cls, dt):
  16. odps = ODPS(
  17. access_id=ODPS_CONFIG['ACCESSID'],
  18. secret_access_key=ODPS_CONFIG['ACCESSKEY'],
  19. project=ODPS_CONFIG['PROJECT'],
  20. endpoint=ODPS_CONFIG['ENDPOINT']
  21. )
  22. data_values = []
  23. try:
  24. sql = f'SELECT videoid,time,type,channel FROM loghubods.transport_spider_recommend_video_hour WHERE dt = "{dt}" and channel = "搬运工具"'
  25. with odps.execute_sql(sql).open_reader() as reader:
  26. for row in reader:
  27. data_values.append(json.dumps( {"videoid": row[0], "time": row[1], "type": row[2], "channel": row[3], "dt": str(dt)}, ensure_ascii=False ))
  28. except Exception as e:
  29. print(f"An error occurred: {e}")
  30. return data_values
  31. return data_values
  32. @classmethod
  33. def main(cls):
  34. dt = (datetime.datetime.now() - datetime.timedelta(hours=1)).strftime('%Y%m%d%H')
  35. data_count = cls.get_data_count(dt= dt)
  36. sample_size = math.ceil(len(data_count) / 2)
  37. random_selection = random.sample(data_count, sample_size)
  38. print(len(random_selection))
  39. return random_selection
  40. if __name__ == '__main__':
  41. OdpsDataCount.main()