crawler_channel_accounts.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. """
  2. @author: luojunhui
  3. @description: crawler channel accounts
  4. """
  5. from applications.api import WechatChannelAPI
  6. from applications.db import DatabaseConnector
  7. from applications.pipeline import scrape_account_entities_process
  8. from applications.utils import Item
  9. from applications.utils import insert_into_video_meta_accounts_table
  10. from config import long_articles_config
  11. class ChannelAccountCrawler:
  12. """
  13. crawler channel accounts
  14. strategy:
  15. 1. try to get search keys and titles from database
  16. 2. try to get hot_points from web
  17. 2. use search api to get accounts
  18. """
  19. def __init__(self):
  20. self.db_client = DatabaseConnector(db_config=long_articles_config)
  21. self.db_client.connect()
  22. def get_seed_keys_from_db(self):
  23. """
  24. get search keys from database
  25. """
  26. sql = "select * from datastat_sort_strategy limit 100;"
  27. result = self.db_client.fetch(sql)
  28. return result
  29. CA = ChannelAccountCrawler()
  30. result_list = CA.get_seed_keys_from_db()
  31. for item in result_list:
  32. print(item)