account_pipeline.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. """
  2. @author: luojunhui
  3. @description: account crawler pipeline
  4. """
  5. from applications.db import DatabaseConnector
  6. empty_dict = {}
  7. def whether_duplicate_account_id(account_id: str, platform: str, db_client: DatabaseConnector) -> bool:
  8. """
  9. whether duplicate account id
  10. """
  11. sql = f"""
  12. select id, status from video_meta_accounts
  13. where account_id = %s and platform = %s;
  14. """
  15. fetch_response = db_client.fetch(query=sql, params=(account_id, platform))
  16. if fetch_response:
  17. duplicate_id, status = fetch_response[0]
  18. if duplicate_id and status:
  19. return True
  20. return False
  21. else:
  22. return False
  23. def scrape_account_entities_process(account_item: dict, db_client: DatabaseConnector) -> dict:
  24. """
  25. scrape_account_entities_process,
  26. """
  27. account_id = account_item['account_id']
  28. platform = account_item['platform']
  29. # whether account exists
  30. if whether_duplicate_account_id(account_id, platform, db_client):
  31. print("duplicate account id: {}".format(account_id))
  32. return empty_dict
  33. # account analysis
  34. return account_item