1 year ago · 0ceffe7d78
--- a/dev/dev_script/get_img.py
+++ b/dev/dev_script/get_img.py
@@ -0,0 +1,107 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/9/11
			
 
				+from urllib.parse import urlencode
			
 
				+from urllib.request import urlretrieve
			
 
				+
			
 
				+import requests
			
 
				+import time
			
 
				+
			
 
				+
			
 
				+def getPage(offset):
			
 
				+    """获取网页信息"""
			
 
				+    data = {
			
 
				+        'tn': 'resultjson_com',
			
 
				+        'ipn': 'rj',
			
 
				+        'ct': '201326592',
			
 
				+        'is': '',
			
 
				+        'fp': 'result',
			
 
				+        'queryWord': '街拍',
			
 
				+        'cl': '2',
			
 
				+        'lm': '-1',
			
 
				+        'ie': 'utf - 8',
			
 
				+        'oe': 'utf - 8',
			
 
				+        'adpicid': '',
			
 
				+        'st': '-1',
			
 
				+        'z': '',
			
 
				+        'ic': '0',
			
 
				+        'hd': '',
			
 
				+        'latest': '',
			
 
				+        'copyright': '',
			
 
				+        'word': '街拍',
			
 
				+        's': '',
			
 
				+        'se': '',
			
 
				+        'tab': '',
			
 
				+        'width': '',
			
 
				+        'height': '',
			
 
				+        'face': '0',
			
 
				+        'istype': '2',
			
 
				+        'qc': '',
			
 
				+        'nc': '1',
			
 
				+        'fr': '',
			
 
				+        'expermode': '',
			
 
				+        'force': '',
			
 
				+        'pn': offset,
			
 
				+        'rn': '30',
			
 
				+        'gsm': '1e',
			
 
				+        '1551789143500': '',
			
 
				+    }
			
 
				+    headers = {
			
 
				+        'Accept': 'text/plain, */*; q=0.01',
			
 
				+        'Accept-Encoding': 'deflate, br',
			
 
				+        'Accept-Language': 'Accept-Language',
			
 
				+        'Connection': 'keep-alive',
			
 
				+        'Cookie': 'BDqhfp=%E8%A1%97%E6%8B%8D%26%260-10-1undefined%26%260%26%261; BIDUPSID=7CA5F033CA22949F5FB6110DBC5DC1EE; BAIDUID=6DDE5BAA44763FD6C7CA84401CB19F36:FG=1; indexPageSugList=%5B%22%E8%A1%97%E6%8B%8D%22%5D; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; uploadTime=1551768107224; userFrom=null; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; firstShowTip=1; cleanHistoryStatus=0',
			
 
				+        'Host': 'image.baidu.com',
			
 
				+        'Referer': 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E8%A1%97%E6%8B%8D&oq=%E8%A1%97%E6%8B%8D&rsp=-1',
			
 
				+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6735.400 QQBrowser/10.2.2328.400',
			
 
				+        'X-Requested-With': 'XMLHttpRequest',
			
 
				+    }
			
 
				+    url = 'https://image.baidu.com/search/acjson?' + urlencode(data)
			
 
				+    try:
			
 
				+        res = requests.get(url, data=data, headers=headers)
			
 
				+        res.encoding = 'utf-8'  # 网页信息编码
			
 
				+        if res.status_code == 200:
			
 
				+            return res.json()
			
 
				+    except requests.ConnectionError:
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def getImage(json):
			
 
				+    """解析网页数据并爬取所需的信息"""
			
 
				+    try:
			
 
				+        data = json.get('data')
			
 
				+        if data:
			
 
				+            for item in data:
			
 
				+                yield {
			
 
				+                    'image': item.get('hoverURL'),
			
 
				+                    'title': item.get('fromPageTitleEnc'),
			
 
				+                }
			
 
				+    except:
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def saveImage(item):
			
 
				+    """把获取的图片与标题封装并存储"""
			
 
				+    try:
			
 
				+        m = item.get('title')
			
 
				+        local_image = item.get('image')  # 获取图片的url
			
 
				+        image_url = local_image
			
 
				+        urlretrieve(image_url, './pic/' + str(m) + '.jpg')
			
 
				+        # print('p'+str(m) + '.jpg')
			
 
				+    except:
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def main(offset):
			
 
				+    """调度爬取函数和存储"""
			
 
				+    json = getPage(offset)
			
 
				+    for item in getImage(json):
			
 
				+        print(item)
			
 
				+        saveImage(item)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    for i in range(5):  # 此处循环遍历五次是不可行的  每次data值中的gsm在变化
			
 
				+        main(offset=i * 30)
			
 
				+        time.sleep(1)
			
--- a/dev/dev_script/youdaofanyi.py
+++ b/dev/dev_script/youdaofanyi.py
@@ -0,0 +1,84 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/9/11
			
 
				+import requests
			
 
				+import hashlib
			
 
				+import time
			
 
				+import random
			
 
				+
			
 
				+"""
			
 
				+有道
			
 
				+py3.6
			
 
				+ning
			
 
				+"""
			
 
				+"""
			
 
				+ var r = function(e) {
			
 
				+    var t = n.md5(navigator.appVersion)
			
 
				+      , r = "" + (new Date).getTime()
			
 
				+      , i = r + parseInt(10 * Math.random(), 10);
			
 
				+    return {
			
 
				+        ts: r,
			
 
				+        bv: t,
			
 
				+        salt: i,
			
 
				+        sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")
			
 
				+    }
			
 
				+};
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+def p(text):
			
 
				+    print(text)
			
 
				+    t = int(time.time() * 1000)
			
 
				+    hl = hashlib.md5()
			
 
				+    i = str(t) + str(random.randint(0, 9))
			
 
				+    str_c = "fanyideskweb" + text + i + "Ygy_4c=r#e#4EX^NUGUc5"
			
 
				+    hl.update(str_c.encode(encoding='utf-8'))
			
 
				+    sign = hl.hexdigest()
			
 
				+    bv = hashlib.md5()
			
 
				+    bv.update(
			
 
				+        '5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
			
 
				+            .encode(encoding='utf-8'))
			
 
				+    bv = bv.hexdigest()
			
 
				+    data = {
			
 
				+        'i': text,
			
 
				+        'from': 'AUTO',
			
 
				+        'to': 'AUTO',
			
 
				+        'smartresult': 'dict',
			
 
				+        'client': 'fanyideskweb',
			
 
				+        'salt': i,
			
 
				+        'sign': sign,
			
 
				+        'lts': t,
			
 
				+        'bv': bv,
			
 
				+        'doctype': 'json',
			
 
				+        'version': '2.1',
			
 
				+        'keyfrom': 'fanyi.web',
			
 
				+        'action': 'FY_BY_REALTlME'
			
 
				+    }
			
 
				+    print(data)
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def get_p(text):
			
 
				+    cookie = {
			
 
				+        'OUTFOX_SEARCH_USER_ID': '-412303195@10.112.57.88',
			
 
				+        'OUTFOX_SEARCH_USER_ID_NCOO': '257278283.1462334',
			
 
				+        '___rl__test__cookies': '{}'.format(int(time.time() * 1000))
			
 
				+    }
			
 
				+    header = {
			
 
				+        'User-Agent': 'Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 105.0.0.0Safari / 537.36',
			
 
				+        'Referer': 'https://fanyi.youdao.com/'
			
 
				+
			
 
				+    }
			
 
				+    my_json = requests.post('https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule',
			
 
				+                       data=p(text), headers=header, cookies=cookie).json()
			
 
				+    print(my_json['translateResult'][0][0]['tgt'])
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    while True:
			
 
				+        str1 = input('请输入翻译的内容：')
			
 
				+        if str1 == "exit":
			
 
				+            break
			
 
				+        get_p(str1)
			
--- a/ganggangdouchuan/ganggangdouchuan_recommend/ganggangdouchuan_recommend.py
+++ b/ganggangdouchuan/ganggangdouchuan_recommend/ganggangdouchuan_recommend.py
@@ -166,7 +166,7 @@ class GanggangdouchuanRecommend:
 
				 
			
 
				     @classmethod
			
 
				     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
			
 
				-        sql = f""" select * from crawler_video where platform in ("{crawler}", "{cls.platform}") and out_video_id="{out_video_id}"; """
			
 
				+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
			
 
				         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				         return len(repeat_video)
			
 
				 
			
--- a/jixiangxingfu/jixiangxingfu_recommend/jixiangxingfu_recommend.py
+++ b/jixiangxingfu/jixiangxingfu_recommend/jixiangxingfu_recommend.py
@@ -163,7 +163,7 @@ class JixiangxingfuRecommend:
 
				                 pass
			
 
				     @classmethod
			
 
				     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
			
 
				-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
			
 
				+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
			
 
				         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				         return len(repeat_video)
			
 
				 
			
--- a/zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqingtiantiankan_recommend.py
+++ b/zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqingtiantiankan_recommend.py
@@ -139,7 +139,7 @@ class ZhiqingtiantiankanRecommend:
 
				 
			
 
				     @classmethod
			
 
				     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
			
 
				-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
			
 
				+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
			
 
				         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				         return len(repeat_video)
			
 
				 
			
--- a/zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend.py
+++ b/zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend.py
@@ -159,7 +159,7 @@ class ZhongmiaoyinxinRecommend:
 
				 
			
 
				     @classmethod
			
 
				     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
			
 
				-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
			
 
				+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
			
 
				         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				         return len(repeat_video)
			
 
				 
			
--- a/zhufuquanzi/zhufuquanzi_recommend/zhufuquanzi_recommend.py
+++ b/zhufuquanzi/zhufuquanzi_recommend/zhufuquanzi_recommend.py
@@ -124,7 +124,7 @@ class ZFQZRecommend:
 
				 
			
 
				     @classmethod
			
 
				     def repeat_video(cls, log_type, crawler, video_id, env):
			
 
				-        sql = f""" select * from crawler_video where platform in ("{crawler}","{cls.platform}") and out_video_id="{video_id}"; """
			
 
				+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{video_id}"; """
			
 
				         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				         return len(repeat_video)