account_pipeline.py 1.0 KB

123456789101112131415161718192021222324252627282930313233343536
  1. """
  2. @author: luojunhui
  3. @description: account crawler pipeline
  4. """
  5. from applications.db import DatabaseConnector
  6. empty_dict = {}
  7. def whether_duplicate_account_id(account_id: str, platform: str, db_client: DatabaseConnector) -> bool:
  8. """
  9. whether duplicate account id
  10. """
  11. sql = f"""
  12. select id, status from video_meta_accounts
  13. where account_id = %s and platform = %s;
  14. """
  15. duplicate_id, status = db_client.fetch(query=sql, params=(account_id, platform))[0]
  16. if duplicate_id and status:
  17. return True
  18. return False
  19. def scrape_account_entities_process(account_item: dict, db_client: DatabaseConnector) -> dict:
  20. """
  21. scrape_account_entities_process,
  22. """
  23. account_id = account_item['account_id']
  24. platform = account_item['platform']
  25. # whether account exists
  26. if whether_duplicate_account_id(account_id, platform, db_client):
  27. print("duplicate account id: {}".format(account_id))
  28. return empty_dict
  29. # account analysis
  30. return account_item