account_pipeline.py 940 B

1234567891011121314151617181920212223242526272829303132333435
  1. """
  2. @author: luojunhui
  3. @description: account crawler pipeline
  4. """
  5. from applications.db import DatabaseConnector
  6. empty_dict = {}
  7. def whether_duplicate_account_id(account_id: str, platform: str, db_client: DatabaseConnector) -> bool:
  8. """
  9. whether duplicate account id
  10. """
  11. sql = f"""
  12. select id from video_meta_accounts
  13. where account_id = %s and platform = %s;
  14. """
  15. duplicate_id = db_client.fetch(query=sql, params=(account_id, platform))
  16. if duplicate_id:
  17. return True
  18. return False
  19. def scrape_account_entities_process(account_item: dict, db_client: DatabaseConnector) -> dict:
  20. """
  21. scrape_account_entities_process,
  22. """
  23. account_id = account_item['account_id']
  24. platform = account_item['platform']
  25. # whether account exists
  26. if whether_duplicate_account_id(account_id, platform, db_client):
  27. return empty_dict
  28. # account analysis
  29. return account_item