123456789101112131415161718192021222324252627282930313233343536373839404142 |
- """
- @author: luojunhui
- @description: crawler channel accounts
- """
- from applications.api import WechatChannelAPI
- from applications.db import DatabaseConnector
- from applications.pipeline import scrape_account_entities_process
- from applications.utils import Item
- from applications.utils import insert_into_video_meta_accounts_table
- from config import long_articles_config
- class ChannelAccountCrawler:
- """
- crawler channel accounts
- strategy:
- 1. try to get search keys and titles from database
- 2. try to get hot_points from web
- 2. use search api to get accounts
- """
- def __init__(self):
- self.db_client = DatabaseConnector(db_config=long_articles_config)
- self.db_client.connect()
- def get_seed_keys_from_db(self):
- """
- get search keys from database
- """
- sql = "select * from datastat_sort_strategy limit 100;"
- result = self.db_client.fetch(sql)
- return result
- CA = ChannelAccountCrawler()
- result_list = CA.get_seed_keys_from_db()
- for item in result_list:
- print(item)
|