wangkun 1 year ago
parent
commit
0ceffe7d78

+ 107 - 0
dev/dev_script/get_img.py

@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/9/11
+from urllib.parse import urlencode
+from urllib.request import urlretrieve
+
+import requests
+import time
+
+
+def getPage(offset):
+    """获取网页信息"""
+    data = {
+        'tn': 'resultjson_com',
+        'ipn': 'rj',
+        'ct': '201326592',
+        'is': '',
+        'fp': 'result',
+        'queryWord': '街拍',
+        'cl': '2',
+        'lm': '-1',
+        'ie': 'utf - 8',
+        'oe': 'utf - 8',
+        'adpicid': '',
+        'st': '-1',
+        'z': '',
+        'ic': '0',
+        'hd': '',
+        'latest': '',
+        'copyright': '',
+        'word': '街拍',
+        's': '',
+        'se': '',
+        'tab': '',
+        'width': '',
+        'height': '',
+        'face': '0',
+        'istype': '2',
+        'qc': '',
+        'nc': '1',
+        'fr': '',
+        'expermode': '',
+        'force': '',
+        'pn': offset,
+        'rn': '30',
+        'gsm': '1e',
+        '1551789143500': '',
+    }
+    headers = {
+        'Accept': 'text/plain, */*; q=0.01',
+        'Accept-Encoding': 'deflate, br',
+        'Accept-Language': 'Accept-Language',
+        'Connection': 'keep-alive',
+        'Cookie': 'BDqhfp=%E8%A1%97%E6%8B%8D%26%260-10-1undefined%26%260%26%261; BIDUPSID=7CA5F033CA22949F5FB6110DBC5DC1EE; BAIDUID=6DDE5BAA44763FD6C7CA84401CB19F36:FG=1; indexPageSugList=%5B%22%E8%A1%97%E6%8B%8D%22%5D; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; uploadTime=1551768107224; userFrom=null; BDRCVFR[X_XKQks0S63]=mk3SLVN4HKm; firstShowTip=1; cleanHistoryStatus=0',
+        'Host': 'image.baidu.com',
+        'Referer': 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E8%A1%97%E6%8B%8D&oq=%E8%A1%97%E6%8B%8D&rsp=-1',
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6735.400 QQBrowser/10.2.2328.400',
+        'X-Requested-With': 'XMLHttpRequest',
+    }
+    url = 'https://image.baidu.com/search/acjson?' + urlencode(data)
+    try:
+        res = requests.get(url, data=data, headers=headers)
+        res.encoding = 'utf-8'  # 网页信息编码
+        if res.status_code == 200:
+            return res.json()
+    except requests.ConnectionError:
+        return None
+
+
+def getImage(json):
+    """解析网页数据并爬取所需的信息"""
+    try:
+        data = json.get('data')
+        if data:
+            for item in data:
+                yield {
+                    'image': item.get('hoverURL'),
+                    'title': item.get('fromPageTitleEnc'),
+                }
+    except:
+        return None
+
+
+def saveImage(item):
+    """把获取的图片与标题封装并存储"""
+    try:
+        m = item.get('title')
+        local_image = item.get('image')  # 获取图片的url
+        image_url = local_image
+        urlretrieve(image_url, './pic/' + str(m) + '.jpg')
+        # print('p'+str(m) + '.jpg')
+    except:
+        return None
+
+
+def main(offset):
+    """调度爬取函数和存储"""
+    json = getPage(offset)
+    for item in getImage(json):
+        print(item)
+        saveImage(item)
+
+
+if __name__ == '__main__':
+    for i in range(5):  # 此处循环遍历五次是不可行的  每次data值中的gsm在变化
+        main(offset=i * 30)
+        time.sleep(1)

+ 84 - 0
dev/dev_script/youdaofanyi.py

@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/9/11
+import requests
+import hashlib
+import time
+import random
+
+"""
+有道
+py3.6
+ning
+"""
+"""
+ var r = function(e) {
+    var t = n.md5(navigator.appVersion)
+      , r = "" + (new Date).getTime()
+      , i = r + parseInt(10 * Math.random(), 10);
+    return {
+        ts: r,
+        bv: t,
+        salt: i,
+        sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")
+    }
+};
+
+
+"""
+
+
+def p(text):
+    print(text)
+    t = int(time.time() * 1000)
+    hl = hashlib.md5()
+    i = str(t) + str(random.randint(0, 9))
+    str_c = "fanyideskweb" + text + i + "Ygy_4c=r#e#4EX^NUGUc5"
+    hl.update(str_c.encode(encoding='utf-8'))
+    sign = hl.hexdigest()
+    bv = hashlib.md5()
+    bv.update(
+        '5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
+            .encode(encoding='utf-8'))
+    bv = bv.hexdigest()
+    data = {
+        'i': text,
+        'from': 'AUTO',
+        'to': 'AUTO',
+        'smartresult': 'dict',
+        'client': 'fanyideskweb',
+        'salt': i,
+        'sign': sign,
+        'lts': t,
+        'bv': bv,
+        'doctype': 'json',
+        'version': '2.1',
+        'keyfrom': 'fanyi.web',
+        'action': 'FY_BY_REALTlME'
+    }
+    print(data)
+    return data
+
+
+def get_p(text):
+    cookie = {
+        'OUTFOX_SEARCH_USER_ID': '-412303195@10.112.57.88',
+        'OUTFOX_SEARCH_USER_ID_NCOO': '257278283.1462334',
+        '___rl__test__cookies': '{}'.format(int(time.time() * 1000))
+    }
+    header = {
+        'User-Agent': 'Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 105.0.0.0Safari / 537.36',
+        'Referer': 'https://fanyi.youdao.com/'
+
+    }
+    my_json = requests.post('https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule',
+                       data=p(text), headers=header, cookies=cookie).json()
+    print(my_json['translateResult'][0][0]['tgt'])
+
+
+if __name__ == '__main__':
+    while True:
+        str1 = input('请输入翻译的内容:')
+        if str1 == "exit":
+            break
+        get_p(str1)

+ 1 - 1
ganggangdouchuan/ganggangdouchuan_recommend/ganggangdouchuan_recommend.py

@@ -166,7 +166,7 @@ class GanggangdouchuanRecommend:
 
     @classmethod
     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
-        sql = f""" select * from crawler_video where platform in ("{crawler}", "{cls.platform}") and out_video_id="{out_video_id}"; """
+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)
 

+ 1 - 1
jixiangxingfu/jixiangxingfu_recommend/jixiangxingfu_recommend.py

@@ -163,7 +163,7 @@ class JixiangxingfuRecommend:
                 pass
     @classmethod
     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)
 

+ 1 - 1
zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqingtiantiankan_recommend.py

@@ -139,7 +139,7 @@ class ZhiqingtiantiankanRecommend:
 
     @classmethod
     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)
 

+ 1 - 1
zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend.py

@@ -159,7 +159,7 @@ class ZhongmiaoyinxinRecommend:
 
     @classmethod
     def repeat_out_video_id(cls, log_type, crawler, out_video_id, env):
-        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{out_video_id}"; """
+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{out_video_id}"; """
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)
 

+ 1 - 1
zhufuquanzi/zhufuquanzi_recommend/zhufuquanzi_recommend.py

@@ -124,7 +124,7 @@ class ZFQZRecommend:
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
-        sql = f""" select * from crawler_video where platform in ("{crawler}","{cls.platform}") and out_video_id="{video_id}"; """
+        sql = f""" select * from crawler_video where platform in ("众妙音信", "刚刚都传", "吉祥幸福", "知青天天看", "zhufuquanzi", "haitunzhufu") and out_video_id="{video_id}"; """
         repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
         return len(repeat_video)