1 год назад · 8a4982ba23
--- a/applications/ai.py
+++ b/applications/ai.py
@@ -0,0 +1,48 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import json
			
 
				+import requests
			
 
				+from openai import OpenAI
			
 
				+
			
 
				+
			
 
				+def kimi_ai(prompt):
			
 
				+    """
			
 
				+    kimi extract text
			
 
				+    :param prompt:
			
 
				+    :return:
			
 
				+    """
			
 
				+    client = OpenAI(
			
 
				+        api_key='sk-5DqYCa88kche6nwIWjLE1p4oMm8nXrR9kQMKbBolNAWERu7q',
			
 
				+        base_url="https://api.moonshot.cn/v1"
			
 
				+    )
			
 
				+    chat_completion = client.chat.completions.create(
			
 
				+        messages=[
			
 
				+            {
			
 
				+                "role": "user",
			
 
				+                "content": prompt,
			
 
				+            }
			
 
				+        ],
			
 
				+        model="moonshot-v1-8k",
			
 
				+    )
			
 
				+    response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
			
 
				+    return response
			
 
				+
			
 
				+
			
 
				+def metaSo(prompt, mode="深入"):
			
 
				+    """
			
 
				+    meta
			
 
				+    :param prompt:
			
 
				+    :param mode:
			
 
				+    :return:
			
 
				+    """
			
 
				+    url = "http://8.217.190.241:8888/crawler/metaso/meta_ai"
			
 
				+    body = {
			
 
				+        "prompt": prompt,
			
 
				+        "mode": mode
			
 
				+    }
			
 
				+    header = {
			
 
				+        "Content-Type": "application/json"
			
 
				+    }
			
 
				+    response = requests.post(url=url, json=body, headers=header)
			
 
				+    return response.json()['data']
			
--- a/applications/functions.py
+++ b/applications/functions.py
@@ -4,6 +4,9 @@
 
				 from datetime import datetime, timedelta
			
 
				 
			
 
				 import requests
			
 
				+import json
			
 
				+import oss2
			
 
				+from uuid import uuid4
			
 
				 
			
 
				 
			
 
				 def generate_daily_strings(start_date, end_date):
			
@@ -62,4 +65,130 @@ def get_text(video_id):
 
				         json=body,
			
 
				         headers=header
			
 
				     )
			
 
				-    return response.json()
			
 
				+    return response.json()
			
 
				+
			
 
				+
			
 
				+def auto_upload_aigc():
			
 
				+    """
			
 
				+    auto publish
			
 
				+    :return:
			
 
				+    """
			
 
				+    url = "http://aigc-api.cybertogether.net/aigc/crawler/plan/save"
			
 
				+
			
 
				+    payload = json.dumps({
			
 
				+        "params": {
			
 
				+            "contentFilters": [],
			
 
				+            "accountFilters": [],
			
 
				+            "filterAccountMatchMode": 1,
			
 
				+            "filterContentMatchMode": 1,
			
 
				+            "selectModeValues": [],
			
 
				+            "imageSearchModeValues": [],
			
 
				+            "contentModal": 3,
			
 
				+            "analyze": {},
			
 
				+            "crawlerComment": 0,
			
 
				+            "inputGroup": [
			
 
				+                [
			
 
				+                    {
			
 
				+                        "inputValue": [
			
 
				+                            {
			
 
				+                                "fileName": "pqzf.png",
			
 
				+                                "ossKey": "upload/03bf695277827c2387133a1ac9290fd2.png",
			
 
				+                                "type": "image/png",
			
 
				+                                "size": 2978
			
 
				+                            }
			
 
				+                        ],
			
 
				+                        "fieldName": "cover",
			
 
				+                        "fieldType": 1,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28"
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "fieldName": "title",
			
 
				+                        "fieldType": 0,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28",
			
 
				+                        "inputValue": "412412412"
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "fieldName": "content",
			
 
				+                        "fieldType": 0,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28",
			
 
				+                        "inputValue": "12312442"
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "fieldName": "image",
			
 
				+                        "fieldType": 1,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28",
			
 
				+                        "inputValue": [
			
 
				+                            {
			
 
				+                                "fileName": "lehuo.png",
			
 
				+                                "ossKey": "upload/4bf6db57ccd1629909e070833aab8878.png",
			
 
				+                                "type": "image/png",
			
 
				+                                "size": 5085
			
 
				+                            }
			
 
				+                        ]
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "fieldName": "video",
			
 
				+                        "fieldType": 2,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28"
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "fieldName": "audio",
			
 
				+                        "fieldType": 3,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28"
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "fieldName": "tag",
			
 
				+                        "fieldType": 0,
			
 
				+                        "groupId": "fa9557a13208975a893777188f9e4b28"
			
 
				+                    }
			
 
				+                ]
			
 
				+            ],
			
 
				+            "inputSourceGroups": [],
			
 
				+            "modePublishTime": [],
			
 
				+            "name": "junhui测试自动上传_by_python",
			
 
				+            "frequencyType": 3,
			
 
				+            "planType": 2
			
 
				+        },
			
 
				+        "baseInfo": {
			
 
				+            "token": "af54cdc404c3464d896745df389b2dce",
			
 
				+            "appType": 9,
			
 
				+            "platform": "pc",
			
 
				+            "appVersionCode": 1000,
			
 
				+            "clientTimestamp": 1,
			
 
				+            "fid": 1,
			
 
				+            "loginUid": 1,
			
 
				+            "pageSource": 1,
			
 
				+            "requestId": 1,
			
 
				+            "rid": 1,
			
 
				+            "uid": 1
			
 
				+        }
			
 
				+    })
			
 
				+    headers = {
			
 
				+        'Accept': 'application/json',
			
 
				+        'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
			
 
				+        'Content-Type': 'application/json',
			
 
				+        'Origin': 'http://aigc-admin.cybertogether.net',
			
 
				+        'Proxy-Connection': 'keep-alive',
			
 
				+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
			
 
				+    }
			
 
				+
			
 
				+    response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+
			
 
				+    print(response.text)
			
 
				+
			
 
				+
			
 
				+def upload_to_oss(local_path):
			
 
				+    """
			
 
				+    上传到oss
			
 
				+    :return:
			
 
				+    """
			
 
				+    oss_video_key = str(uuid4())
			
 
				+    access_key_id = "LTAIP6x1l3DXfSxm"
			
 
				+    access_key_secret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
			
 
				+    endpoint = "oss-cn-hangzhou.aliyuncs.com"
			
 
				+    bucket_name = "art-pubbucket"
			
 
				+    bucket = oss2.Bucket(
			
 
				+        oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name
			
 
				+    )
			
 
				+    bucket.put_object_from_file(key=oss_video_key, filename=local_path)
			
 
				+    return oss_video_key
			
--- a/spider/__init__.py
+++ b/spider/__init__.py
--- a/spider/toutiao.py
+++ b/spider/toutiao.py
@@ -0,0 +1,96 @@
 
				+"""
			
 
				+@author: luojunhui
			
 
				+"""
			
 
				+import urllib.parse
			
 
				+import requests
			
 
				+
			
 
				+from fake_useragent import FakeUserAgent
			
 
				+from lxml import etree
			
 
				+
			
 
				+
			
 
				+def tunnel_proxies():
			
 
				+    """
			
 
				+        快代理方法
			
 
				+        :return:
			
 
				+        """
			
 
				+    tunnel = "q796.kdltps.com:15818"
			
 
				+    username = "t17772369458618"
			
 
				+    password = "5zqcjkmy"
			
 
				+    proxies = {
			
 
				+        "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
			
 
				+        "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
			
 
				+    }
			
 
				+    return proxies
			
 
				+
			
 
				+
			
 
				+def search_article(title):
			
 
				+    """
			
 
				+    通过标题搜索文章
			
 
				+    :param title:
			
 
				+    :return:
			
 
				+    """
			
 
				+    url = "https://so.toutiao.com/search"
			
 
				+    params = {
			
 
				+        "dvpf": "pc",
			
 
				+        "source": "search_subtab_switch",
			
 
				+        "keyword": title,
			
 
				+        "page_num": 0,
			
 
				+        "pd": "information",
			
 
				+        "action_type": "search_subtab_switch",
			
 
				+        "search_id": "",
			
 
				+        "from": "news",
			
 
				+        "cur_tab_title": "news"
			
 
				+    }
			
 
				+    headers = {
			
 
				+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				+        'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
			
 
				+        'Cache-Control': 'max-age=0',
			
 
				+        'Connection': 'keep-alive',
			
 
				+        'Cookie': '__ac_nonce=06656db2c0044df592020; __ac_signature=_02B4Z6wo00f01f7SgiAAAIDA3giITC7VZDn-8oaAABnk69; msToken=2BrMcxLg3_PsS1iKcjuLpU1GS9iZsZ-51KSQTgUSRRLhGQqsQV3zKuJR49smQ7f8hQ8fahWtYCKC6TKJO3kR8022S-NsNfdHXu7X7mPM; ttwid=1%7C9b5sTIuwZxZKt0wFsvE-2t5OoFxH_Q5VIpVNWEREbAo%7C1716968863%7C3459387c0d736a3410ba2c5dbdaeb61a7f85f161bbf6cb1a4f646c2a2edc9aa5;',
			
 
				+        'Sec-Fetch-Dest': 'document',
			
 
				+        'Sec-Fetch-Mode': 'navigate',
			
 
				+        'Sec-Fetch-Site': 'none',
			
 
				+        'Sec-Fetch-User': '?1',
			
 
				+        'Upgrade-Insecure-Requests': '1',
			
 
				+        'User-Agent': FakeUserAgent().random,
			
 
				+    }
			
 
				+    response = requests.get(url=url, headers=headers, params=params)
			
 
				+    html = etree.HTML(response.text)
			
 
				+    xpath = '//div[@class="cs-view cs-view-block cs-card-content"]//a/@href'
			
 
				+    result_list = html.xpath(xpath)
			
 
				+    urls = [urllib.parse.unquote(i.replace("/search/jump?url=", "")) for i in result_list]
			
 
				+    article_urls = [url.replace("http://www.toutiao.com/a", "https://www.toutiao.com/article/") for url in urls]
			
 
				+    return article_urls
			
 
				+
			
 
				+
			
 
				+def parse_detail(url):
			
 
				+    """
			
 
				+
			
 
				+    :param url:
			
 
				+    :return:
			
 
				+    """
			
 
				+    payload = {}
			
 
				+    headers = {
			
 
				+        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
			
 
				+        'accept-language': 'en',
			
 
				+        'cookie': '__ac_signature=_02B4Z6wo00f01uL1Y5QAAIDDwi9p-RyULmbi1WcAAN8B5b; tt_webid=7371293454351697471; ttcid=a7499fc4f17243e1a6f1d47fc054799e16; _ga=GA1.1.1771235425.1716434457; s_v_web_id=verify_lwiorbpi_iEdehlbX_70wd_4ldg_Bctq_iI8TdJIzXVnI; csrftoken=ee756af695a449eeb73b5a3fc78978b2; _S_DPR=2.200000047683716; _S_IPAD=0; notRedShot=1; local_city_cache=%E5%8C%97%E4%BA%AC; gfkadpd=24,6457; _S_WIN_WH=1336_726; ttwid=1%7C9b5sTIuwZxZKt0wFsvE-2t5OoFxH_Q5VIpVNWEREbAo%7C1717060308%7C4203558278ecaad696c5c5d3ff2bbab682c3ae0f89b876ab618b4646f04c81ef; tt_scid=GNVu.6vOxpT8JE14FSVyGcUl7IItTWcIP.qGQ.gRujo02xjV1BqV1tKznH7NlULMc251; msToken=dG4Y0x3XtlJGBx5WEqYilfjCRWJq629eYRg-hLWdfiTavnI5szx-h9KwUUAggGcG2i03AksGmCfQ034JCbnfWnTnhuguXU1yISZMr6YE; _ga_QEHZPBE5HH=GS1.1.1717057829.7.1.1717060394.0.0.0',
			
 
				+        'priority': 'u=0, i',
			
 
				+        'referer': url,
			
 
				+        'upgrade-insecure-requests': '1',
			
 
				+        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
			
 
				+    }
			
 
				+    response = requests.request("GET", url, headers=headers, data=payload, proxies=tunnel_proxies())
			
 
				+    html = etree.HTML(response.text)
			
 
				+    text_xpath = '//div[@class="article-content"]//p/text()'
			
 
				+    img_xpath = '//div[@class="pgc-img"]/img/@src'
			
 
				+    title_xpath = '//div[@class="article-content"]/h1/text()'
			
 
				+    result_list = html.xpath(text_xpath)
			
 
				+    img_list = html.xpath(img_xpath)
			
 
				+    title = html.xpath(title_xpath)
			
 
				+    obj = {
			
 
				+        "article_url": url,
			
 
				+        "text": "\n".join(result_list),
			
 
				+        "img_urls": img_list,
			
 
				+        "title": title
			
 
				+    }
			
 
				+    return obj