罗俊辉 9 месяцев назад
Родитель
Сommit
b2ff6e3b26

+ 7 - 0
applications/__init__.py

@@ -0,0 +1,7 @@
+"""
+@author: luojunhui
+"""
+from .aidit_api import AIDTApi
+from .denet_mysql import DeNetMysql
+from .pq_mysql import PQMySQL
+from .functions import Functions

+ 109 - 0
applications/aidit_api.py

@@ -0,0 +1,109 @@
+"""
+@author: luojunhui
+"""
+import requests
+import json
+
+
+class AIDTApi(object):
+    """
+    自动操作
+    """
+    headers = {
+        'Accept': 'application/json',
+        'Accept-Language': 'zh,zh-CN;q=0.9',
+        'Content-Type': 'application/json',
+        'Origin': 'http://admin.cybertogether.net',
+        'Proxy-Connection': 'keep-alive',
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
+    }
+    person_info = {
+        "token": "af54cdc404c3464d896745df389b2dce",
+        "appType": 9,
+        "platform": "pc",
+        "appVersionCode": 1000,
+        "clientTimestamp": 1,
+        "fid": 1,
+        "loginUid": 1,
+        "pageSource": 1,
+        "requestId": 1,
+        "rid": 1,
+        "uid": 1
+    }
+
+    @classmethod
+    def getPlanArticleList(cls, page_index, plan_id):
+        """
+        获取抓取计划下的文章list
+        :param plan_id:
+        :param page_index:
+        :return:
+        """
+        url = "http://aigc-api.cybertogether.net/aigc/crawler/content/list"
+        payload = json.dumps({
+            "params": {
+                "filterItems": [
+                    {
+                        "itemName": "sourceCrawlerPlans",
+                        "selectValues": [plan_id]
+                    }
+                ],
+                "listFieldFormula": [],
+                "pageNum": page_index,
+                "pageSize": 50,
+                "contentModal": 3
+            },
+            "baseInfo": cls.person_info
+        })
+        response = requests.request("POST", url, headers=cls.headers, data=payload)
+        return response.json()
+
+    @classmethod
+    def updateArticleIntoCrawlerPlan(cls, plan_id, plan_name, plan_tag, url_list):
+        """
+        往抓取计划加文章
+        :return:
+        """
+        url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
+        payload = json.dumps({
+            "params": {
+                "contentFilters": [],
+                "accountFilters": [],
+                "filterAccountMatchMode": 1,
+                "filterContentMatchMode": 1,
+                "selectModeValues": [],
+                "searchModeValues": [],
+                "contentModal": 3,
+                "analyze": {},
+                "crawlerComment": 0,
+                "inputGroup": None,
+                "inputSourceGroups": [],
+                "modePublishTime": [],
+                "planType": 2,
+                "frequencyType": 2,
+                "planTag": plan_tag,
+                "tagPenetrateFlag": 0,
+                "id": plan_id,
+                "name": plan_name,
+                "channel": 5,
+                "crawlerMode": 5,
+                "inputModeValues": url_list,
+                "modePublishTimeStart": None,
+                "modePublishTimeEnd": None,
+                "executeRate": None,
+                "executeDate": None,
+                "executeWindowStart": None,
+                "executeWindowEnd": None,
+                "executeTimeInterval": None,
+                "executeNum": None,
+                "addModal": None,
+                "addChannel": None,
+                "fileUpload": None,
+                "prompt": None,
+                "acelFlag": None,
+                "tasks": []
+            },
+            "baseInfo": cls.person_info
+        })
+        response = requests.request("POST", url, headers=cls.headers, data=payload)
+        print(json.dumps(response.json(), ensure_ascii=False, indent=4))

+ 33 - 0
applications/denet_mysql.py

@@ -0,0 +1,33 @@
+"""
+@author: luojunhui
+"""
+import pymysql
+from config import planConfigDict
+
+
+class DeNetMysql(object):
+    """
+     Mysql Server
+    """
+    connection = pymysql.connect(
+        host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+        port=3306,  # 端口号
+        user="crawler_readonly",  # mysql用户名
+        passwd="cyber#crawler_2023",  # mysql用户登录密码
+        db="aigc-admin-prod",  # 数据库名
+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+    )
+
+    @classmethod
+    def getUnEmptyPlan(cls):
+        """
+        :return:
+        """
+        sql = "select id, name from crawler_plan where name like '%腾讯互选-%' and crawler_total_num > 0;"
+        cursor = cls.connection.cursor()
+        cursor.execute(sql)
+        data = cursor.fetchall()
+        result = [list(line)[0] for line in data]
+        # print(result)
+        result = [i for i in result if planConfigDict.get(i)]
+        return result

+ 30 - 0
applications/functions.py

@@ -0,0 +1,30 @@
+"""
+@author: luojunhui
+"""
+import requests
+
+
+class Functions(object):
+    """
+    functions class
+    """
+    @classmethod
+    def getTitleScore(cls, title_list, account_name):
+        """
+        标题打分
+        :param title_list:
+        :param account_name:
+        :return:
+        """
+        url = "http://192.168.100.31:8179/score_list"
+        body = {
+            "account_nickname_list": [account_name],
+            "text_list": title_list,
+            "max_time": None,
+            "min_time": None,
+            "interest_type": "by_avg",
+            "sim_type": "mean",
+            "rate": 0.1
+        }
+        response = requests.post(url=url, headers={}, json=body).json()
+        return response

+ 42 - 0
applications/pq_mysql.py

@@ -0,0 +1,42 @@
+"""
+@author: luojunhui
+"""
+import pymysql
+
+
+class PQMySQL(object):
+    """
+    PQ Mysql
+    """
+    connection = pymysql.connect(
+        host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
+        port=3306,
+        user='crawler',
+        password='crawler123456@',
+        db='piaoquan-crawler',
+        charset='utf8mb4'
+    )
+
+    @classmethod
+    def update(cls, sql, params):
+        """
+        更新
+        :return:
+        """
+        cursor = cls.connection.cursor()
+        cursor.execute(sql, params)
+        cls.connection.commit()
+
+    @classmethod
+    def select(cls, sql):
+        """
+        查询
+        :param sql:
+        :return:
+        """
+        cursor = cls.connection.cursor()
+        cursor.execute(sql)
+        result = cursor.fetchall()
+        return result
+
+

+ 332 - 0
config/__init__.py

@@ -0,0 +1,332 @@
+"""
+@author: luojunhui
+"""
+
+planConfigDict = {
+    # "20240721063854162433630": "动漫二次元",
+    # "20240721064001535851572": "游戏",
+    "20240721064012570747614": "装修设计",
+    "20240721064027033693901": "家居生活",
+    "20240721064037856692958": "创意美学",
+    "20240721064054346901161": "时尚潮流",
+    "20240721065139955704312": "美容美妆",
+    "20240721065144690822816": "服饰穿搭",
+    "20240721065204600967818": "好物种草",
+    "20240721065216328783909": "影视影评",
+    "20240721065222773392809": "娱乐八卦",
+    "20240721065313016877129": "情感生活",
+    "20240721065418085156676": "文化文学",
+    "20240721065444407933304": "生活方式",
+    # "20240721065448295911613": "宗教历史",
+    # "20240721065456074517643": "军事政法",
+    "20240721065536790163449": "金融财经",
+    "20240721065541597979774": "房产楼市",
+    "20240721065548525430079": "科学科普",
+    "20240721065636638920221": "互联网",
+    "20240721065642988552526": "数码3C",
+    "20240721065757870627696": "新闻媒体",
+    "20240721065858360363927": "行业资讯",
+    "20240721065936394197711": "区域生活",
+    "20240721070021406546573": "健康养生",
+    "20240721070027562219631": "体育赛事",
+    "20240721070031464491910": "运动健身",
+    # "20240721070120535645091": "母婴育儿",
+    "20240721070316597585132": "教育培训",
+    "20240721070328794991834": "校园生活",
+    "20240721070341572360754": "职场管理",
+    "20240721070358450257397": "餐饮美食",
+    "20240721070400833119871": "星座命理",
+    "20240721070405096586304": "搞笑幽默",
+    "20240721070427794574827": "旅游出行",
+    # "20240721070443436973433": "汽车",
+    # "20240721070450497257695": "音乐",
+    "20240723131249517316218": "军事政法-年龄56-66",
+    "20240723131151203388473": "宗教历史-年龄56-66"
+}
+
+poolLevelConfig = {
+    "1": [
+        "指尖奇文--1",
+        "老友闲谈--1",
+        "老友欢聚地--1",
+        "趣味晚年--1",
+        "情为老友--1",
+        "退休老年圈--1"
+    ],
+    "2": [
+        "指尖奇文--2",
+        "老友闲谈--2",
+        "老友欢聚地--2",
+        "情为老友--2",
+        "多彩妙生活--1",
+        "多彩妙生活--2",
+        "家家生活指南--1",
+        "老友快乐谈--1",
+        "人生百事观--1",
+        "农耕趣时刻--1",
+        "幸福启示--1",
+        "路边闲聊社--1",
+        "快乐精选集--1",
+        "生活百态观--1",
+        "生活百态观--2",
+        "日常巧思集--1",
+        "趣史论--1",
+        "畅聊奇闻--1",
+        "生活指示录--1",
+        "奇趣百味生活--1",
+        "无忧自在生活--1",
+        "喜乐生活派--1",
+        "态度说--1",
+        "缘来养心厅--1",
+        "便捷生活好方法--1",
+        "看不够妙招--1"
+    ],
+    "3": [
+        "趣味晚年--2",
+        "退休老年圈--2",
+        "家家生活指南--2",
+        "老友快乐谈--2",
+        "农耕趣时刻--2",
+        "幸福启示--2",
+        "路边闲聊社--2",
+        "快乐精选集--2",
+        "日常巧思集--2",
+        "趣史论--2",
+        "畅聊奇闻--2",
+        "生活指示录--2",
+        "奇趣百味生活--2",
+        "无忧自在生活--2",
+        "喜乐生活派--2",
+        "态度说--2",
+        "缘来养心厅--2",
+        "便捷生活好方法--2",
+        "看不够妙招--2",
+        "日常生活小技巧集--1",
+        "日常生活小技巧集--2",
+        "都市镜头--1",
+        "都市镜头--2",
+        "退休无忧生活--1",
+        "退休无忧生活--2",
+        "心灵情感驿站--1",
+        "心灵情感驿站--2",
+        "票圈极速版--1",
+        "票圈极速版--2",
+        "生活良读--1",
+        "生活良读--2",
+        "史记有言--1",
+        "史记有言--2",
+        "幸福妙招合集--1",
+        "幸福妙招合集--2",
+        "妙招持家帮手--1",
+        "妙招持家帮手--2",
+        "乐享生活小窍门--1",
+        "乐享生活小窍门--2",
+        "天天学生活技巧--1",
+        "天天学生活技巧--2",
+        "音药金曲厅--1",
+        "日常小妙招秘籍--1",
+        "日常小妙招秘籍--2",
+        "窦都事说--1",
+        "巷尾风声--1",
+        "趣谈史记--1",
+        "趣谈史记--2",
+        "趣味生活达人--1",
+        "趣味生活达人--2",
+        "异闻趣事多--1",
+        "异闻趣事多--2",
+        "无忧生活小妙招--1",
+        "幸福晚年知音--1",
+        "麒阁史记--1",
+        "老来生活家--1",
+        "那些历史--1",
+        "祝福养心厅--1",
+        "音药养心馆--1",
+        "史记趣言--1",
+        "生活晓常识--1",
+        "生活情感叁读--1",
+        "心海情澜起--1",
+        "繁花史阁--1",
+        "小贝生活课堂--1"
+    ],
+    "4": [
+        "指尖奇文--3",
+        "指尖奇文--4",
+        "指尖奇文--5",
+        "指尖奇文--6",
+        "老友闲谈--3",
+        "老友闲谈--4",
+        "老友闲谈--5",
+        "老友闲谈--6",
+        "老友欢聚地--3",
+        "老友欢聚地--4",
+        "老友欢聚地--5",
+        "老友欢聚地--6",
+        "老友欢聚地--7",
+        "老友欢聚地--8",
+        "趣味晚年--3",
+        "趣味晚年--4",
+        "趣味晚年--5",
+        "趣味晚年--6",
+        "情为老友--3",
+        "情为老友--4",
+        "情为老友--5",
+        "情为老友--6",
+        "退休老年圈--3",
+        "退休老年圈--4",
+        "退休老年圈--5",
+        "退休老年圈--6",
+        "多彩妙生活--3",
+        "多彩妙生活--4",
+        "多彩妙生活--5",
+        "多彩妙生活--6",
+        "多彩妙生活--7",
+        "多彩妙生活--8",
+        "家家生活指南--3",
+        "家家生活指南--4",
+        "家家生活指南--5",
+        "家家生活指南--6",
+        "家家生活指南--7",
+        "家家生活指南--8",
+        "老友快乐谈--3",
+        "人生百事观--2",
+        "人生百事观--3",
+        "农耕趣时刻--3",
+        "农耕趣时刻--4",
+        "农耕趣时刻--5",
+        "农耕趣时刻--6",
+        "路边闲聊社--3",
+        "路边闲聊社--4",
+        "路边闲聊社--5",
+        "路边闲聊社--6",
+        "生活百态观--3",
+        "生活百态观--4",
+        "生活百态观--5",
+        "生活百态观--6",
+        "日常巧思集--3",
+        "日常巧思集--4",
+        "日常巧思集--5",
+        "日常巧思集--6",
+        "趣史论--3",
+        "趣史论--4",
+        "趣史论--5",
+        "趣史论--6",
+        "缘来养心厅--3",
+        "缘来养心厅--4",
+        "缘来养心厅--5",
+        "缘来养心厅--6",
+        "心灵情感驿站--3",
+        "票圈极速版--3",
+        "史记有言--3",
+        "史记有言--4",
+        "史记有言--5",
+        "史记有言--6",
+        "音药金曲厅--2",
+        "日常小妙招秘籍--3",
+        "日常小妙招秘籍--4",
+        "日常小妙招秘籍--5",
+        "日常小妙招秘籍--6",
+        "窦都事说--2",
+        "巷尾风声--2",
+        "趣谈史记--3",
+        "趣谈史记--4",
+        "趣谈史记--5",
+        "趣谈史记--6",
+        "无忧生活小妙招--2",
+        "幸福晚年知音--2",
+        "麒阁史记--2",
+        "老来生活家--2",
+        "那些历史--2",
+        "祝福养心厅--2",
+        "音药养心馆--2",
+        "史记趣言--2",
+        "生活晓常识--2",
+        "生活情感叁读--2",
+        "心海情澜起--2",
+        "繁花史阁--2",
+        "小贝生活课堂--2",
+        "无忧潮生活--1",
+        "无忧潮生活--2",
+        "实用妙招800个--1",
+        "实用妙招800个--2",
+        "票圈美文速递--1",
+        "票圈美文速递--2",
+        "票圈美文速递--3",
+        "小惠爱厨房--1",
+        "小惠爱厨房--2",
+        "小惠爱厨房--3",
+        "小惠爱厨房--4",
+        "小惠爱厨房--5",
+        "小惠爱厨房--6",
+        "小惠爱厨房--7",
+        "小惠爱厨房--8",
+        "小阳看天下--1",
+        "小阳看天下--2",
+        "票圈正能量--1",
+        "票圈正能量--2",
+        "票圈大事件--1",
+        "票圈大事件--2"
+    ],
+    "5": [
+        "老友快乐谈--4",
+        "人生百事观--4",
+        "幸福启示--3",
+        "幸福启示--4",
+        "快乐精选集--3",
+        "快乐精选集--4",
+        "畅聊奇闻--3",
+        "畅聊奇闻--4",
+        "生活指示录--3",
+        "生活指示录--4",
+        "奇趣百味生活--3",
+        "奇趣百味生活--4",
+        "无忧自在生活--3",
+        "无忧自在生活--4",
+        "喜乐生活派--3",
+        "喜乐生活派--4",
+        "态度说--3",
+        "态度说--4",
+        "便捷生活好方法--3",
+        "便捷生活好方法--4",
+        "看不够妙招--3",
+        "日常生活小技巧集--3",
+        "日常生活小技巧集--4",
+        "都市镜头--3",
+        "都市镜头--4",
+        "退休无忧生活--3",
+        "退休无忧生活--4",
+        "票圈极速版--4",
+        "幸福妙招合集--3",
+        "妙招持家帮手--3",
+        "乐享生活小窍门--3",
+        "天天学生活技巧--3",
+        "天天学生活技巧--4",
+        "音药金曲厅--3",
+        "音药金曲厅--4",
+        "巷尾风声--3",
+        "巷尾风声--4",
+        "趣味生活达人--3",
+        "异闻趣事多--3",
+        "异闻趣事多--4",
+        "无忧生活小妙招--3",
+        "麒阁史记--3",
+        "麒阁史记--4",
+        "那些历史--3",
+        "那些历史--4",
+        "祝福养心厅--3",
+        "音药养心馆--3",
+        "音药养心馆--4",
+        "史记趣言--3",
+        "心海情澜起--3",
+        "小阳看天下--3"
+    ]
+}
+
+cateMap = {
+    "军事政法": 0.2,
+    "宗教历史": 0.15,
+    "新闻媒体": 0.15,
+    "情感生活": 0.1,
+    "健康养生": 0.1,
+    "娱乐八卦": 0.1,
+    "其他": 0.2
+}

+ 41 - 0
config/crawler_plan_config.json

@@ -0,0 +1,41 @@
+{
+    "20240721063854162433630": "动漫二次元",
+    "20240721064001535851572": "游戏",
+    "20240721064012570747614": "装修设计",
+    "20240721064027033693901": "家居生活",
+    "20240721064037856692958": "创意美学",
+    "20240721064054346901161": "时尚潮流",
+    "20240721065139955704312": "美容美妆",
+    "20240721065144690822816": "服饰穿搭",
+    "20240721065204600967818": "好物种草",
+    "20240721065216328783909": "影视影评",
+    "20240721065222773392809": "娱乐八卦",
+    "20240721065313016877129": "情感生活",
+    "20240721065418085156676": "文化文学",
+    "20240721065444407933304": "生活方式",
+    "20240721065448295911613": "宗教历史",
+    "20240721065456074517643": "军事政法",
+    "20240721065536790163449": "金融财经",
+    "20240721065541597979774": "房产楼市",
+    "20240721065548525430079": "科学科普",
+    "20240721065636638920221": "互联网",
+    "20240721065642988552526": "数码3C",
+    "20240721065757870627696": "新闻媒体",
+    "20240721065858360363927": "行业资讯",
+    "20240721065936394197711": "区域生活",
+    "20240721070021406546573": "健康养生",
+    "20240721070027562219631": "体育赛事",
+    "20240721070031464491910": "运动健身",
+    "20240721070120535645091": "母婴育儿",
+    "20240721070316597585132": "教育培训",
+    "20240721070328794991834": "校园生活",
+    "20240721070341572360754": "职场管理",
+    "20240721070358450257397": "餐饮美食",
+    "20240721070400833119871": "星座命理",
+    "20240721070405096586304": "搞笑幽默",
+    "20240721070427794574827": "旅游出行",
+    "20240721070443436973433": "汽车",
+    "20240721070450497257695": "音乐",
+    "20240723131249517316218": "军事政法-年龄56-66",
+    "20240723131151203388473": "宗教历史-年龄56-66"
+}

+ 276 - 0
config/pool_config.json

@@ -0,0 +1,276 @@
+{
+    "1": [
+        "指尖奇文--1",
+        "老友闲谈--1",
+        "老友欢聚地--1",
+        "趣味晚年--1",
+        "情为老友--1",
+        "退休老年圈--1"
+    ],
+    "2": [
+        "指尖奇文--2",
+        "老友闲谈--2",
+        "老友欢聚地--2",
+        "情为老友--2",
+        "多彩妙生活--1",
+        "多彩妙生活--2",
+        "家家生活指南--1",
+        "老友快乐谈--1",
+        "人生百事观--1",
+        "农耕趣时刻--1",
+        "幸福启示--1",
+        "路边闲聊社--1",
+        "快乐精选集--1",
+        "生活百态观--1",
+        "生活百态观--2",
+        "日常巧思集--1",
+        "趣史论--1",
+        "畅聊奇闻--1",
+        "生活指示录--1",
+        "奇趣百味生活--1",
+        "无忧自在生活--1",
+        "喜乐生活派--1",
+        "态度说--1",
+        "缘来养心厅--1",
+        "便捷生活好方法--1",
+        "看不够妙招--1"
+    ],
+    "3": [
+        "趣味晚年--2",
+        "退休老年圈--2",
+        "家家生活指南--2",
+        "老友快乐谈--2",
+        "农耕趣时刻--2",
+        "幸福启示--2",
+        "路边闲聊社--2",
+        "快乐精选集--2",
+        "日常巧思集--2",
+        "趣史论--2",
+        "畅聊奇闻--2",
+        "生活指示录--2",
+        "奇趣百味生活--2",
+        "无忧自在生活--2",
+        "喜乐生活派--2",
+        "态度说--2",
+        "缘来养心厅--2",
+        "便捷生活好方法--2",
+        "看不够妙招--2",
+        "日常生活小技巧集--1",
+        "日常生活小技巧集--2",
+        "都市镜头--1",
+        "都市镜头--2",
+        "退休无忧生活--1",
+        "退休无忧生活--2",
+        "心灵情感驿站--1",
+        "心灵情感驿站--2",
+        "票圈极速版--1",
+        "票圈极速版--2",
+        "生活良读--1",
+        "生活良读--2",
+        "史记有言--1",
+        "史记有言--2",
+        "幸福妙招合集--1",
+        "幸福妙招合集--2",
+        "妙招持家帮手--1",
+        "妙招持家帮手--2",
+        "乐享生活小窍门--1",
+        "乐享生活小窍门--2",
+        "天天学生活技巧--1",
+        "天天学生活技巧--2",
+        "音药金曲厅--1",
+        "日常小妙招秘籍--1",
+        "日常小妙招秘籍--2",
+        "窦都事说--1",
+        "巷尾风声--1",
+        "趣谈史记--1",
+        "趣谈史记--2",
+        "趣味生活达人--1",
+        "趣味生活达人--2",
+        "异闻趣事多--1",
+        "异闻趣事多--2",
+        "无忧生活小妙招--1",
+        "幸福晚年知音--1",
+        "麒阁史记--1",
+        "老来生活家--1",
+        "那些历史--1",
+        "祝福养心厅--1",
+        "音药养心馆--1",
+        "史记趣言--1",
+        "生活晓常识--1",
+        "生活情感叁读--1",
+        "心海情澜起--1",
+        "繁花史阁--1",
+        "小贝生活课堂--1"
+    ],
+    "4": [
+        "指尖奇文--3",
+        "指尖奇文--4",
+        "指尖奇文--5",
+        "指尖奇文--6",
+        "老友闲谈--3",
+        "老友闲谈--4",
+        "老友闲谈--5",
+        "老友闲谈--6",
+        "老友欢聚地--3",
+        "老友欢聚地--4",
+        "老友欢聚地--5",
+        "老友欢聚地--6",
+        "老友欢聚地--7",
+        "老友欢聚地--8",
+        "趣味晚年--3",
+        "趣味晚年--4",
+        "趣味晚年--5",
+        "趣味晚年--6",
+        "情为老友--3",
+        "情为老友--4",
+        "情为老友--5",
+        "情为老友--6",
+        "退休老年圈--3",
+        "退休老年圈--4",
+        "退休老年圈--5",
+        "退休老年圈--6",
+        "多彩妙生活--3",
+        "多彩妙生活--4",
+        "多彩妙生活--5",
+        "多彩妙生活--6",
+        "多彩妙生活--7",
+        "多彩妙生活--8",
+        "家家生活指南--3",
+        "家家生活指南--4",
+        "家家生活指南--5",
+        "家家生活指南--6",
+        "家家生活指南--7",
+        "家家生活指南--8",
+        "老友快乐谈--3",
+        "人生百事观--2",
+        "人生百事观--3",
+        "农耕趣时刻--3",
+        "农耕趣时刻--4",
+        "农耕趣时刻--5",
+        "农耕趣时刻--6",
+        "路边闲聊社--3",
+        "路边闲聊社--4",
+        "路边闲聊社--5",
+        "路边闲聊社--6",
+        "生活百态观--3",
+        "生活百态观--4",
+        "生活百态观--5",
+        "生活百态观--6",
+        "日常巧思集--3",
+        "日常巧思集--4",
+        "日常巧思集--5",
+        "日常巧思集--6",
+        "趣史论--3",
+        "趣史论--4",
+        "趣史论--5",
+        "趣史论--6",
+        "缘来养心厅--3",
+        "缘来养心厅--4",
+        "缘来养心厅--5",
+        "缘来养心厅--6",
+        "心灵情感驿站--3",
+        "票圈极速版--3",
+        "史记有言--3",
+        "史记有言--4",
+        "史记有言--5",
+        "史记有言--6",
+        "音药金曲厅--2",
+        "日常小妙招秘籍--3",
+        "日常小妙招秘籍--4",
+        "日常小妙招秘籍--5",
+        "日常小妙招秘籍--6",
+        "窦都事说--2",
+        "巷尾风声--2",
+        "趣谈史记--3",
+        "趣谈史记--4",
+        "趣谈史记--5",
+        "趣谈史记--6",
+        "无忧生活小妙招--2",
+        "幸福晚年知音--2",
+        "麒阁史记--2",
+        "老来生活家--2",
+        "那些历史--2",
+        "祝福养心厅--2",
+        "音药养心馆--2",
+        "史记趣言--2",
+        "生活晓常识--2",
+        "生活情感叁读--2",
+        "心海情澜起--2",
+        "繁花史阁--2",
+        "小贝生活课堂--2",
+        "无忧潮生活--1",
+        "无忧潮生活--2",
+        "实用妙招800个--1",
+        "实用妙招800个--2",
+        "票圈美文速递--1",
+        "票圈美文速递--2",
+        "票圈美文速递--3",
+        "小惠爱厨房--1",
+        "小惠爱厨房--2",
+        "小惠爱厨房--3",
+        "小惠爱厨房--4",
+        "小惠爱厨房--5",
+        "小惠爱厨房--6",
+        "小惠爱厨房--7",
+        "小惠爱厨房--8",
+        "小阳看天下--1",
+        "小阳看天下--2",
+        "票圈正能量--1",
+        "票圈正能量--2",
+        "票圈大事件--1",
+        "票圈大事件--2"
+    ],
+    "5": [
+        "老友快乐谈--4",
+        "人生百事观--4",
+        "幸福启示--3",
+        "幸福启示--4",
+        "快乐精选集--3",
+        "快乐精选集--4",
+        "畅聊奇闻--3",
+        "畅聊奇闻--4",
+        "生活指示录--3",
+        "生活指示录--4",
+        "奇趣百味生活--3",
+        "奇趣百味生活--4",
+        "无忧自在生活--3",
+        "无忧自在生活--4",
+        "喜乐生活派--3",
+        "喜乐生活派--4",
+        "态度说--3",
+        "态度说--4",
+        "便捷生活好方法--3",
+        "便捷生活好方法--4",
+        "看不够妙招--3",
+        "日常生活小技巧集--3",
+        "日常生活小技巧集--4",
+        "都市镜头--3",
+        "都市镜头--4",
+        "退休无忧生活--3",
+        "退休无忧生活--4",
+        "票圈极速版--4",
+        "幸福妙招合集--3",
+        "妙招持家帮手--3",
+        "乐享生活小窍门--3",
+        "天天学生活技巧--3",
+        "天天学生活技巧--4",
+        "音药金曲厅--3",
+        "音药金曲厅--4",
+        "巷尾风声--3",
+        "巷尾风声--4",
+        "趣味生活达人--3",
+        "异闻趣事多--3",
+        "异闻趣事多--4",
+        "无忧生活小妙招--3",
+        "麒阁史记--3",
+        "麒阁史记--4",
+        "那些历史--3",
+        "那些历史--4",
+        "祝福养心厅--3",
+        "音药养心馆--3",
+        "音药养心馆--4",
+        "史记趣言--3",
+        "心海情澜起--3",
+        "小阳看天下--3"
+    ]
+}

+ 38 - 0
dev/config.txt

@@ -0,0 +1,38 @@
+20240721061740680945250,腾讯互选-测试
+20240721063854162433630,腾讯互选-动漫二次元
+20240721064001535851572,腾讯互选-游戏
+20240721064012570747614,腾讯互选-装修设计
+20240721064027033693901,腾讯互选-家居生活
+20240721064037856692958,腾讯互选-创意美学
+20240721064054346901161,腾讯互选-时尚潮流
+20240721065139955704312,腾讯互选-美容美妆
+20240721065144690822816,腾讯互选-服饰穿搭
+20240721065204600967818,腾讯互选-好物种草
+20240721065216328783909,腾讯互选-影视影评
+20240721065222773392809,腾讯互选-娱乐八卦
+20240721065313016877129,腾讯互选-情感生活
+20240721065418085156676,腾讯互选-文化文学
+20240721065444407933304,腾讯互选-生活方式
+20240721065448295911613,腾讯互选-宗教历史
+20240721065456074517643,腾讯互选-军事政法
+20240721065536790163449,腾讯互选-金融财经
+20240721065541597979774,腾讯互选-房产楼市
+20240721065548525430079,腾讯互选-科学科普
+20240721065636638920221,腾讯互选-互联网
+20240721065642988552526,腾讯互选-数码3C
+20240721065757870627696,腾讯互选-新闻媒体
+20240721065858360363927,腾讯互选-行业资讯
+20240721065936394197711,腾讯互选-区域生活
+20240721070021406546573,腾讯互选-健康养生
+20240721070027562219631,腾讯互选-体育赛事
+20240721070031464491910,腾讯互选-运动健身
+20240721070120535645091,腾讯互选-母婴育儿
+20240721070316597585132,腾讯互选-教育培训
+20240721070328794991834,腾讯互选-校园生活
+20240721070341572360754,腾讯互选-职场管理
+20240721070358450257397,腾讯互选-餐饮美食
+20240721070400833119871,腾讯互选-星座命理
+20240721070405096586304,腾讯互选-搞笑幽默
+20240721070427794574827,腾讯互选-旅游出行
+20240721070443436973433,腾讯互选-汽车
+20240721070450497257695,腾讯互选-音乐

+ 21 - 0
dev/test.py

@@ -0,0 +1,21 @@
+"""
+@author: luojunhui
+"""
+import json
+
+with open("config.txt", encoding="utf-8") as f:
+    data = f.readlines()
+
+L = {}
+for line in data:
+    ll = line.strip().split(",")
+    plan_id = ll[0]
+    cate = ll[1].split("-")[1]
+    print(plan_id, cate)
+    L[plan_id] = cate
+
+with open("crawler_plan_config.json", "w", encoding="utf-8") as f:
+    f.write(json.dumps(L, ensure_ascii=False, indent=4))
+
+
+

+ 93 - 0
tasks/task1.py

@@ -0,0 +1,93 @@
+"""
+@author: luojunhui
+"""
+import json
+
+from tqdm import tqdm
+
+from applications import AIDTApi, DeNetMysql, PQMySQL
+
+
+class ColdStartPool(object):
+    """
+    冷启动任务
+    """
+    AidApi = AIDTApi()
+    DeMysql = DeNetMysql()
+    PqMysql = PQMySQL()
+
+    @classmethod
+    def getPlanAllArticles(cls, plan_id):
+        """
+        获取一个计划的所有内容
+        :param plan_id:
+        :return:
+        """
+        page = 1
+        response = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {})
+        data_list = response['data']
+        all_articles_count = response['totalCount']
+        while len(data_list) + 50 * (page - 1) < all_articles_count:
+            page += 1
+            response_next_page = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {})
+            data_list += response_next_page['data']
+        return data_list
+
+    @classmethod
+    def updateToPool(cls, plan_id):
+        """
+        获取计划内容并且写入冷启池
+        :param plan_id:
+        :return:
+        """
+        each_plan_articles = cls.getPlanAllArticles(plan_id)
+        for article in tqdm(each_plan_articles):
+            try:
+                cls.updateEachArticle(article)
+            except Exception as e:
+                print(e)
+
+        # with ThreadPoolExecutor(max_workers=10) as Pool:
+        #     Pool.map(cls.updateEachArticle, each_plan_articles)
+
+    @classmethod
+    def updateEachArticle(cls, article_obj):
+        """
+        update each article to db
+        :param article_obj:
+        :return:
+        """
+        sql = f"""
+        INSERT INTO cold_start_article_pool
+            (content_id, content_link, title, cover, view_count, like_count, looking_count, publish_time_stamp, plan_id, category, content_channel_id)
+        VALUES
+            (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+        """
+        params = (
+            article_obj.get("id"),
+            article_obj.get("contentLink"),
+            article_obj.get("title"),
+            article_obj.get("coverImageUrl"),
+            article_obj.get("viewCount"),
+            article_obj.get("likeCount"),
+            article_obj.get("lookingCount"),
+            article_obj.get("publishTimestamp"),
+            article_obj.get("sourceCrawlerPlans")[0].get("id"),
+            article_obj.get("sourceCrawlerPlans")[0].get("name").split("-")[1],
+            article_obj.get("channelContentId")
+        )
+        cls.PqMysql.update(sql=sql, params=params)
+
+    @classmethod
+    def deal(cls):
+        """
+        获取非空抓取计划id
+        :return:
+        """
+        plan_id_list = cls.DeMysql.getUnEmptyPlan()
+        for plan_id in tqdm(plan_id_list):
+            cls.updateToPool(plan_id)
+
+
+CST = ColdStartPool()
+CST.deal()

+ 79 - 0
tasks/task2.py

@@ -0,0 +1,79 @@
+"""
+@author: luojunhui
+"""
+from concurrent.futures.thread import ThreadPoolExecutor
+
+from tqdm import tqdm
+
+from applications import AIDTApi, DeNetMysql, PQMySQL, Functions
+from config import poolLevelConfig
+
+
+class ColdStartTask(object):
+    """
+    冷启分配任务
+    """
+    AidApi = AIDTApi()
+    DeMysql = DeNetMysql()
+    PqMysql = PQMySQL()
+    Fun = Functions()
+    pool4 = poolLevelConfig['4']
+
+    @classmethod
+    def getTopArticles(cls, category, limit_count):
+        """
+        获取高分享的文章list
+        :return:
+        """
+        sql = f"""
+        select content_id, content_link, title 
+        from cold_start_article_pool
+        where category = '{category}'
+        order by view_count DESC, publish_time_stamp DESC
+        limit {limit_count};
+        """
+        result = cls.PqMysql.select(sql)
+        return result
+
+    @classmethod
+    def computeScore(cls):
+        """
+        和每个账号计算相关性分数
+        :return:
+        """
+        # category_list = ["军事政法", "健康养生", "搞笑幽默"]
+        category_list = ["军事政法", "健康养生"]
+        L = []
+        for category in category_list:
+            article_tuple = cls.getTopArticles(category)
+            title_list = [article[2] for article in article_tuple]
+            score_list = cls.Fun.getTitleScore(title_list, "指尖奇文")['指尖奇文']['score_list']
+            for index, score in enumerate(score_list):
+                obj = {
+                    "id": article_tuple[index][0],
+                    "url": article_tuple[index][1],
+                    "title": article_tuple[index][2],
+                    "cate": category,
+                    "score": score
+                }
+                L.append(obj)
+
+        result = [i for i in L if i['score'] >= 0.4]
+        return result
+
+    @classmethod
+    def sendToColdPool(cls):
+        """
+        把文章send至第四层
+        :return:
+        """
+        result = cls.computeScore()
+        army = [i for i in result if i['cate'] == '军事政法']
+        healthy = [i for i in result if i['cate'] == '健康养生']
+        print(len(army))
+        print(len(healthy))
+
+
+cst = ColdStartTask()
+res = cst.getTopArticles(category="军事政法", limit_count=20)
+print(res)