Browse Source

first commit

kk 2 years ago
commit
183ea5fa1f

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+venv
+.idea

+ 0 - 0
common/__init__.py


BIN
common/__pycache__/__init__.cpython-310.pyc


+ 0 - 0
common/db/__init__.py


BIN
common/db/__pycache__/__init__.cpython-310.pyc


BIN
common/db/__pycache__/mysql_help.cpython-310.pyc


+ 102 - 0
common/db/mysql_help.py

@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/2/2
+"""
+数据库连接及操作
+"""
+import logging
+
+import pymysql
+
+
+class MysqlHelper:
+    @classmethod
+    def connect_mysql(cls, env='', machine=''):
+        if machine == 'aliyun_hk':
+            # 创建一个 Connection 对象,代表了一个数据库连接
+            connection = pymysql.connect(
+                host="rm-j6cz4c6pt96000xi3.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                # host="rm-j6cz4c6pt96000xi3lo.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
+                # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                charset="utf8")
+        elif env == 'prod':
+            # 创建一个 Connection 对象,代表了一个数据库连接
+            connection = pymysql.connect(
+                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
+                # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                charset="utf8")
+        else:
+            # 创建一个 Connection 对象,代表了一个数据库连接
+            connection = pymysql.connect(
+                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+                # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址,外网地址
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
+                # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+                charset="utf8")
+
+        return connection
+
+    @classmethod
+    def get_values(cls, sql, env='', machine=''):
+        try:
+            # 连接数据库
+            connect = cls.connect_mysql(env, machine)
+            # 返回一个 Cursor对象
+            mysql = connect.cursor()
+
+            # 执行 sql 语句
+            mysql.execute(sql)
+
+            # fetchall方法返回的是一个元组,里面每个元素也是元组,代表一行记录
+            data = mysql.fetchall()
+
+            # 关闭数据库连接
+            connect.close()
+
+            # 返回查询结果,元组
+            return data
+        except Exception as e:
+            logging.error(f"get_values异常:{e}\n")
+
+    @classmethod
+    def update_values(cls, sql, env='', machine=''):
+        # 连接数据库
+        connect = cls.connect_mysql(env, machine)
+        # 返回一个 Cursor对象
+        mysql = connect.cursor()
+
+        try:
+            # 执行 sql 语句
+            res = mysql.execute(sql)
+            # 注意 一定要commit,否则添加数据不生效
+            connect.commit()
+            return res
+        except Exception as e:
+            logging.error(f"update_values异常,进行回滚操作:{e}\n")
+            # 发生错误时回滚
+            connect.rollback()
+
+        # 关闭数据库连接
+        connect.close()
+
+
+if __name__ == "__main__":
+    # sql_statement = f"INSERT INTO crawler_user ( user_id, out_user_id, out_user_name, out_avatar_url, platform, tag) " \
+    #       f"VALUES ('6282398', 'out_uid_003', 'out_user_name', '', 'xiaoniangao', 'xiaoniangao_play')"
+    # edit_data = MysqlHelper.edit_data(sql=sql_statement)
+    # print(edit_data)
+
+    get_data = MysqlHelper.get_values("select * from crawler_user", "dev", "local")
+    print(get_data)

+ 0 - 0
common/logg/__init__.py


+ 84 - 0
common/logg/logger_help.py

@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/1/31
+"""
+公共方法,包含:生成log / 删除log / 下载方法 / 删除 weixinzhishu_chlsfiles / 过滤词库 / 保存视频信息至本地 txt / 翻译 / ffmpeg
+"""
+from datetime import date, timedelta
+from loguru import logger
+import datetime
+import os
+import time
+
+
+class Common:
+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
+    now = datetime.datetime.now()
+    # 昨天 <class 'str'>  2022-04-13
+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y/%m/%d")
+    # 今天 <class 'datetime.date'>  2022-04-14
+    today = date.today()
+    # 明天 <class 'str'>  2022-04-15
+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y/%m/%d")
+
+    # 使用 logger 模块生成日志
+    @staticmethod
+    def logger(log_type, crawler):
+        """
+        使用 logger 模块生成日志
+        """
+        # 日志路径
+        log_dir = f"./{crawler}/logs/"
+        log_path = os.getcwd() + os.sep + log_dir
+        if not os.path.isdir(log_path):
+            os.makedirs(log_path)
+
+        # 日志文件名
+        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + f'-{crawler}-{log_type}.log'
+
+        # 日志不打印到控制台
+        logger.remove(handler_id=None)
+
+        # rotation="500 MB",实现每 500MB 存储一个文件
+        # rotation="12:00",实现每天 12:00 创建一个文件
+        # rotation="1 week",每周创建一个文件
+        # retention="10 days",每隔10天之后就会清理旧的日志
+        # 初始化日志
+        logger.add(log_dir + log_name, level="INFO", rotation='00:00')
+
+        return logger
+
+    # 清除日志,保留最近 10 个文件
+    @classmethod
+    def del_logs(cls, log_type, crawler):
+        """
+        清除冗余日志文件
+        :return: 保留最近 10 个日志
+        """
+        log_dir = f"./{crawler}/logs/"
+        all_files = sorted(os.listdir(log_dir))
+        all_logs = []
+        for log in all_files:
+            name = os.path.splitext(log)[-1]
+            if name == ".log":
+                all_logs.append(log)
+
+        if len(all_logs) <= 10:
+            pass
+        else:
+            for file in all_logs[:len(all_logs) - 10]:
+                os.remove(log_dir + file)
+        cls.logger(log_type, crawler).info("清除日志成功\n")
+
+    # # 删除 charles 缓存文件,只保留最近的两个文件
+    # @classmethod
+    # def del_charles_files(cls, log_type, crawler):
+    #     # 目标文件夹下所有文件
+    #     all_file = sorted(os.listdir(f"./{crawler}/{crawler}_chlsfiles/"))
+    #     for file in all_file[0:-3]:
+    #         os.remove(f"./{crawler}/{crawler}_chlsfiles/{file}")
+    #     cls.logger(log_type, crawler).info("删除 charles 缓存文件成功\n")
+
+
+if __name__ == "__main__":
+    pass

+ 16 - 0
main.py

@@ -0,0 +1,16 @@
+# This is a sample Python script.
+
+# Press ⌃R to execute it or replace it with your code.
+# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
+
+
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press ⌘F8 to toggle the breakpoint.
+
+
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/

+ 0 - 0
server/__init__.py


+ 70 - 0
server/conf_task.py

@@ -0,0 +1,70 @@
+import json
+from flask import Flask, request
+from flask import jsonify
+from common.db.mysql_help import MysqlHelper
+
+app = Flask(__name__)
+app.config['JSON_AS_ASCII'] = False
+
+
+# 只接受get方法访问
+@app.route("/v1/source/getinfo", methods=["GET"])
+def getSource():
+    # 获取传入的params参数
+    get_data = request.args.to_dict()
+    fields = get_data.get('fields')
+    # # 对参数进行操作
+    sql = 'select source, task_type, spider_name from crawler_source'
+
+    result = MysqlHelper.get_values(sql)
+    if not result:
+        return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
+    source_list = list()
+    for source, task_type, spider_name in result:
+        data = dict(
+            source=source,
+            task_type=task_type,
+            spider_name=spider_name
+        )
+        source_list.append(data)
+    return jsonify({'return_code': '200', 'result': source_list})
+
+
+@app.route("/v1/task/insert", methods=["POST"])
+def inerttask():
+    pass
+    # 获取传入的参数
+    # get_data = request.args.to_dict()
+    # 传入的参数为bytes类型,需要转化成json
+
+    # return json.dumps(return_dict, ensure_ascii=False)
+
+
+@app.route("/v1/task/gettask", methods=["GET"])
+def getTask():
+    get_data = request.args.to_dict()
+
+    sql = 'select task_id, task_name from crawler_task'
+    result = MysqlHelper.get_values(sql)
+    if not result:
+        return jsonify({'return_code': '200', 'result': [], 'message': 'no data'})
+    source_list = list()
+    for task_id, task_name in result:
+        data = dict(
+            task_id=task_id,
+            task_name=task_name,
+        )
+        source_list.append(data)
+    return jsonify({'return_code': '200', 'result': source_list})
+
+
+@app.route("/v1/author/getuser", methods=["POST"])
+def createUser():
+    data = request.form.get('author')
+    print(eval(data))
+    for i in eval(data):
+        print(i)
+    return jsonify({'data':data})
+
+if __name__ == "__main__":
+    app.run(debug=True)