|
@@ -8,6 +8,7 @@ from pymysql.cursors import DictCursor
|
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
|
|
|
|
|
|
from applications import log
|
|
from applications import log
|
|
|
|
|
+from applications.api import ApolloApi
|
|
|
from applications.api import fetch_piaoquan_video_list_detail
|
|
from applications.api import fetch_piaoquan_video_list_detail
|
|
|
from applications.const.crawler_video_const import CrawlerPiaoQuanVideosConst
|
|
from applications.const.crawler_video_const import CrawlerPiaoQuanVideosConst
|
|
|
from applications.db import DatabaseConnector
|
|
from applications.db import DatabaseConnector
|
|
@@ -19,29 +20,8 @@ from applications.utils import insert_into_single_video_source_table
|
|
|
from config import long_articles_config
|
|
from config import long_articles_config
|
|
|
|
|
|
|
|
const = CrawlerPiaoQuanVideosConst()
|
|
const = CrawlerPiaoQuanVideosConst()
|
|
|
-
|
|
|
|
|
-category_map = {
|
|
|
|
|
- "知识科普": "知识科普",
|
|
|
|
|
- "生活技巧科普": "知识科普",
|
|
|
|
|
- "老年相关法律科普": "知识科普",
|
|
|
|
|
- "中国战争史": "军事历史",
|
|
|
|
|
- "中国历史影像": "军事历史",
|
|
|
|
|
- "正能量剧情": "家长里短",
|
|
|
|
|
- "人财诈骗": "社会法治",
|
|
|
|
|
- "贪污腐败": "社会法治",
|
|
|
|
|
- "罕见画面": "奇闻趣事",
|
|
|
|
|
- "惊奇事件": "奇闻趣事",
|
|
|
|
|
- "动物萌宠": "奇闻趣事",
|
|
|
|
|
- "老明星": "名人八卦",
|
|
|
|
|
- "健康知识": "健康养生",
|
|
|
|
|
- "饮食健康": "健康养生",
|
|
|
|
|
- "人生忠告": "情感故事",
|
|
|
|
|
- "老年生活": "情感故事",
|
|
|
|
|
- "国际军事": "政治新闻",
|
|
|
|
|
- "他国政策": "政治新闻",
|
|
|
|
|
- "国际时政": "政治新闻",
|
|
|
|
|
- "历史名人": "历史人物",
|
|
|
|
|
-}
|
|
|
|
|
|
|
+apollo_api = ApolloApi()
|
|
|
|
|
+pq_long_articles_category_mapping = json.loads(apollo_api.get_config_value("pq_long_articles_category_mapping"))
|
|
|
|
|
|
|
|
|
|
|
|
|
class CrawlerPiaoQuanVideos:
|
|
class CrawlerPiaoQuanVideos:
|
|
@@ -105,9 +85,12 @@ class CrawlerPiaoQuanVideos:
|
|
|
)
|
|
)
|
|
|
video_item.add("source_account", const.NO_SOURCE_ACCOUNT)
|
|
video_item.add("source_account", const.NO_SOURCE_ACCOUNT)
|
|
|
video_item.add("crawler_timestamp", int(time.time()))
|
|
video_item.add("crawler_timestamp", int(time.time()))
|
|
|
- video_item.add("oss_path", video_detail["ossVideoPath"])
|
|
|
|
|
|
|
+ video_item.add("video_oss_path", video_detail["ossVideoPath"])
|
|
|
video_item.add("audit_status", video_detail["auditStatus"])
|
|
video_item.add("audit_status", video_detail["auditStatus"])
|
|
|
- video_item.add("category", category_map.get(video_data["category"]))
|
|
|
|
|
|
|
+ category = pq_long_articles_category_mapping.get(video_data["category"])
|
|
|
|
|
+ if category:
|
|
|
|
|
+ video_item.add("category", category)
|
|
|
|
|
+ video_item.add("category_status", const.SUCCESS_STATUS)
|
|
|
|
|
|
|
|
# check item before insert
|
|
# check item before insert
|
|
|
video_item.check(source="video")
|
|
video_item.check(source="video")
|