jihuaqiang 1 week ago
parent
commit
cf72f98115
3 changed files with 2 additions and 400 deletions
  1. 2 94
      requirements.txt
  2. 0 132
      utils/container.py
  3. 0 174
      utils/fei_shu.py

+ 2 - 94
requirements.txt

@@ -1,99 +1,7 @@
-# This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements.txt
-annotated-types==0.7.0
-    # via pydantic
-anyio==4.10.0
-    # via
-    #   google-genai
-    #   httpx
-cachetools==5.5.2
-    # via google-auth
-certifi==2025.8.3
-    # via
-    #   httpcore
-    #   httpx
-    #   requests
-charset-normalizer==3.4.2
-    # via requests
+# 清理后的依赖包,只保留实际使用的包
 dotenv==0.9.9
-    # via knowledge (pyproject.toml)
-google-auth==2.40.3
-    # via google-genai
 google-genai==1.29.0
-    # via knowledge (pyproject.toml)
-grpcio==1.74.0
-    # via
-    #   knowledge (pyproject.toml)
-    #   grpcio-tools
-grpcio-tools==1.74.0
-    # via knowledge (pyproject.toml)
-h11==0.16.0
-    # via httpcore
-httpcore==1.0.9
-    # via httpx
-httpx==0.28.1
-    # via
-    #   google-genai
-    #   lark-oapi
-idna==3.10
-    # via
-    #   anyio
-    #   httpx
-    #   requests
-lark-oapi==1.4.20
-    # via knowledge (pyproject.toml)
 loguru==0.7.3
-    # via knowledge (pyproject.toml)
-protobuf==6.31.1
-    # via
-    #   knowledge (pyproject.toml)
-    #   grpcio-tools
-pyasn1==0.6.1
-    # via
-    #   pyasn1-modules
-    #   rsa
-pyasn1-modules==0.4.2
-    # via google-auth
-pycryptodome==3.23.0
-    # via lark-oapi
-pydantic==2.11.7
-    # via google-genai
-pydantic-core==2.33.2
-    # via pydantic
-python-dotenv==1.1.1
-    # via dotenv
-requests==2.32.4
-    # via
-    #   knowledge (pyproject.toml)
-    #   google-genai
-    #   lark-oapi
-    #   requests-toolbelt
-requests-toolbelt==1.0.0
-    # via lark-oapi
-rsa==4.9.1
-    # via google-auth
-setuptools==80.9.0
-    # via
-    #   knowledge (pyproject.toml)
-    #   grpcio-tools
-sniffio==1.3.1
-    # via anyio
-tenacity==9.1.2
-    # via google-genai
-typing-extensions==4.14.1
-    # via
-    #   anyio
-    #   google-genai
-    #   pydantic
-    #   pydantic-core
-    #   typing-inspection
-typing-inspection==0.4.1
-    # via pydantic
-urllib3==2.5.0
-    # via requests
-websockets==15.0.1
-    # via
-    #   google-genai
-    #   lark-oapi
 pymysql==1.0.2
 Pillow==10.4.0
+requests==2.32.4

+ 0 - 132
utils/container.py

@@ -1,132 +0,0 @@
-import logging
-import os
-from hashlib import sha256
-from typing import List, Tuple
-
-import grpc
-from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2
-
-from protos import container_pb2, container_pb2_grpc
-
-
-class Container(object):
-    _channel: grpc.Channel = None
-    _stub: container_pb2_grpc.ContainerServiceStub = None
-
-    def __new__(cls, *args, **kwargs):
-        if cls._channel is None:
-            cls._channel = grpc.insecure_channel(
-                target=f'{os.getenv("CONTAINER_GRPC_HOST")}:{os.getenv("CONTAINER_GRPC_PORT")}',
-                options=[
-                    ('grpc.keepalive_time_ms', 10000),
-                    ('grpc.keepalive_timeout_ms', 5000),
-                    ('grpc.keepalive_permit_without_calls', True),
-                    ('grpc.http2.max_pings_without_data', 0),
-                ],
-            )
-        cls._stub = container_pb2_grpc.ContainerServiceStub(channel=cls._channel)
-        return super().__new__(cls, *args, **kwargs)
-
-    def __init__(self):
-        self.container_id = None
-
-    # def close_channel(self):
-    #     """关闭通道"""
-    #     if self._channel is not None:
-    #         self._channel.close()
-
-    def start_container(self):
-        """启动一个容器,其最大运行时间为1小时,到期自动停止销毁"""
-        response = self._stub.StartContainer(request=google_dot_protobuf_dot_empty__pb2.Empty())
-        container_id = response.container_id
-        if not container_id:
-            raise RuntimeError('创建容器失败')
-        self.container_id = container_id
-
-    def stop_container(self) -> bool:
-        """关闭一个容器"""
-        if self.container_id:
-            request = container_pb2.StopContainerRequest(container_id=self.container_id)
-            response = self._stub.StopContainer(request=request)
-            return response.success
-        return True
-
-    def run_command(self, command: List[str], show_log: bool = False) -> Tuple[int, str, List[str]]:
-        """在容器内执行一条命令,可用的命令为: ffprobe | ffmpeg"""
-        exit_code, msg = -999, ''
-        request = container_pb2.RunCommandRequest(container_id=self.container_id, command=command)
-        for response in self._stub.RunCommand(request=request):
-            if show_log:
-                logging.info(response.msg)
-            msg += response.msg
-            if response.exit_code != -999:
-                exit_code = response.exit_code
-        return exit_code, msg, command
-
-    def file_exists(self, file_path: str) -> bool:
-        """判断容器内指定路径的文件是否存在"""
-        request = container_pb2.FileExistsRequest(container_id=self.container_id, path=file_path)
-        response = self._stub.FileExists(request=request)
-        return response.exists
-
-    def get_file(self, container_file_path: str, host_file_path: str) -> bool:
-        """从容器内获取文件"""
-        hasher, tmp, sha256sum, length = sha256(), dict(), None, 0
-        request = container_pb2.GetFileRequest(container_id=self.container_id, path=container_file_path)
-        with open(host_file_path, 'wb') as f:
-            for response in self._stub.GetFile(request=request):
-                if response.sha256sum:
-                    sha256sum = response.sha256sum
-                    continue
-                if response.payload:
-                    hasher.update(response.payload)
-                    f.seek(response.offset)
-                    f.write(response.payload)
-                    length += len(response.payload)
-        return hasher.hexdigest() == sha256sum
-
-    def put_file(self, host_file_path: str, container_file_path: str) -> bool:
-        """将宿主机上的文件复制到容器内"""
-        total_size = os.path.getsize(host_file_path)
-        hasher, chunk_size, offset = sha256(), 1024 * 1024, 0
-        with open(host_file_path, 'rb') as f:
-            while offset < total_size:
-                f.seek(offset)
-                chunk = f.read(min(chunk_size, total_size - offset))
-                if not chunk:
-                    break
-                hasher.update(chunk)
-                offset += len(chunk)
-        sha256sum = hasher.hexdigest()
-
-        def chunk_generator():
-            yield container_pb2.ReusableChunk(container_id=self.container_id, path=container_file_path, sha256sum=sha256sum)
-
-            _offset = 0
-            with open(host_file_path, 'rb') as _f:
-                while _offset < total_size:
-                    _f.seek(_offset)
-                    _chunk = _f.read(min(chunk_size, total_size - _offset))
-                    if not _chunk:
-                        break
-                    yield container_pb2.ReusableChunk(container_id=self.container_id, offset=_offset, payload=_chunk)
-                    _offset += len(_chunk)
-
-        response = self._stub.PutFile(chunk_generator())
-        return response.success
-
-    def download_oss(self, bucket_name: str, object_key: str) -> str:
-        """将OSS文件下载到容器"""
-        request = container_pb2.DownloadOssRequest(container_id=self.container_id, bucket_name=bucket_name, object_key=object_key)
-        response = self._stub.DownloadOss(request=request)
-        return response.save_path
-
-    def upload_oss(self, bucket_name: str, object_key: str, container_file_path: str, media_type: str):
-        """将容器内文件上传到OSS"""
-        request = container_pb2.UploadOssRequest(container_id=self.container_id,
-                                                 bucket_name=bucket_name,
-                                                 object_key=object_key,
-                                                 file_path=container_file_path,
-                                                 media_type=media_type)
-        response = self._stub.UploadOss(request=request)
-        return response.object_key

+ 0 - 174
utils/fei_shu.py

@@ -1,174 +0,0 @@
-import os
-import uuid
-from typing import Any, Dict, Optional
-
-import lark_oapi as lark
-from lark_oapi.core.enum import LogLevel
-import config
-
-
-class FeiShu:
-
-    def __init__(self, file_token:str = None):
-        app_id = os.getenv('FEISHU_APP_ID', config.FEISHU_APP_ID)
-        app_secret = os.getenv('FEISHU_APP_SECRET', config.FEISHU_APP_SECRET)
-        self.file_token = file_token if file_token is not None else os.getenv('FEISHU_FILE_TOKEN')
-        self.client = (lark.client.Client().builder()
-                       .app_id(app_id)
-                       .app_secret(app_secret)
-                       .log_level(LogLevel.INFO)
-                       .build())
-
-    def create_table(self, name: str) -> str:
-        request = (lark.bitable.v1.CreateAppTableRequest.builder()
-                   .app_token(self.file_token)
-                   .request_body(lark.bitable.v1.CreateAppTableRequestBody.builder()
-                                 .table(lark.bitable.v1.ReqTable.builder()
-                                        .name(name)
-                                        .fields([
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('原文链接').type(15).build(),
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('抓取结果').type(1).build(),
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('标题').type(1).build(),
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('用户链接').type(15).build(),
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('识别结果').type(1).build(),
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('初步理解').type(1).build(),
-                                            lark.bitable.v1.AppTableCreateHeader.builder().field_name('物理聚合').type(1).build(),
-                                        ])
-                                        .build())
-                                 .build())
-                   .build())
-        response = self.client.bitable.v1.app_table.create(request)
-        if not response.success():
-            raise RuntimeError(f'多维表格创建新数据表失败: {lark.JSON.marshal(response.error)}')
-        return response.data.table_id
-
-    def get_all_records(self, table_id: str, page_token: Optional[str] = None):
-        request = (lark.bitable.v1.SearchAppTableRecordRequest.builder()
-                   .app_token(self.file_token)
-                   .table_id(table_id)
-                   .user_id_type('open_id')
-                   .page_size(500)
-                   .request_body(lark.bitable.v1.SearchAppTableRecordRequestBody.builder()
-                                 .automatic_fields(True)
-                                 .build()))
-        if page_token:
-            request = request.page_token(page_token)
-        request = request.build()
-        response = self.client.bitable.v1.app_table_record.search(request)
-        if not response.success():
-            raise RuntimeError(f'获取多维表格记录失败: {lark.JSON.marshal(response.error)}')
-        return response.data
-
-    def get_record(self, table_id: str, *record_ids: str):
-        request = (lark.bitable.v1.BatchGetAppTableRecordRequest.builder()
-                   .app_token(self.file_token)
-                   .table_id(table_id)
-                   .request_body(lark.bitable.v1.BatchGetAppTableRecordRequestBody.builder()
-                                 .record_ids(list(record_ids))
-                                 .user_id_type('open_id')
-                                 .with_shared_url(True)
-                                 .automatic_fields(True)
-                                 .build())
-                   .build())
-        response = self.client.bitable.v1.app_table_record.batch_get(request)
-        if not response.success():
-            raise RuntimeError(f'获取多维表格指定记录失败: {lark.JSON.marshal(response.error)}')
-        return response.data
-
-    def create_record(self, table_id: str, *fields: Dict[str, Any]):
-        request = (lark.bitable.v1.BatchCreateAppTableRecordRequest.builder()
-                   .app_token(self.file_token)
-                   .table_id(table_id)
-                   .user_id_type('open_id')
-                   .client_token(str(uuid.uuid4()))
-                   .ignore_consistency_check(False)
-                   .request_body(lark.bitable.v1.BatchCreateAppTableRecordRequestBody.builder()
-                                 .records([lark.bitable.v1.AppTableRecord.builder()
-                                          .fields(item)
-                                          .build() for item in fields])
-                                 .build())
-                   .build())
-        response = self.client.bitable.v1.app_table_record.batch_create(request)
-        if not response.success():
-            raise RuntimeError(f'向多维表格添加记录失败: {lark.JSON.marshal(response.error)}')
-        return response.data
-
-    def update_record(self, table_id: str, *records: lark.bitable.v1.AppTableRecord):
-        request = (lark.bitable.v1.BatchUpdateAppTableRecordRequest.builder()
-                   .app_token(self.file_token)
-                   .table_id(table_id)
-                   .user_id_type('open_id')
-                   .ignore_consistency_check(False)
-                   .request_body(lark.bitable.v1.BatchUpdateAppTableRecordRequestBody.builder()
-                                 .records(list(records))
-                                 .build())
-                   .build())
-        response = self.client.bitable.v1.app_table_record.batch_update(request)
-        if not response.success():
-            raise RuntimeError(f'更新多维表格指定记录失败: {lark.JSON.marshal(response.error)}')
-        return response.data
-
-    def delete_record(self, table_id: str, *record_ids: str):
-        request = (lark.bitable.v1.BatchDeleteAppTableRecordRequest.builder()
-                   .app_token(self.file_token)
-                   .table_id(table_id)
-                   .request_body(lark.bitable.v1.BatchDeleteAppTableRecordRequestBody.builder()
-                                 .records(list(record_ids))
-                                 .build())
-                   .build())
-        response = self.client.bitable.v1.app_table_record.batch_delete(request)
-        if not response.success():
-            raise RuntimeError(f'删除多维表格指定记录失败: {lark.JSON.marshal(response.error)}')
-        return response.data
-
-
-if __name__ == '__main__':
-    from dotenv import load_dotenv
-
-    try:
-        load_dotenv()
-    except ImportError:
-        raise RuntimeError('导入环境变量失败')
-
-    feishu = FeiShu()
-    # 创建数据表
-    new_table_id = feishu.create_table('测试数据表')
-    # 新增记录
-    new_fields = [
-        {
-            '原文链接': {
-                'link': 'https://www.baidu.com',
-                'text': 'https://www.baidu.com',
-            },
-            '抓取结果': '这是抓取结果1',
-        },
-        {
-            '原文链接': {
-                'link': 'https://www.qq.com',
-                'text': 'https://www.qq.com',
-            },
-            '抓取结果': '这是抓取结果2',
-        }
-    ]
-    feishu.create_record(new_table_id, *new_fields)
-    # 获取全部记录
-    get_result = feishu.get_all_records(new_table_id)
-    has_more = get_result.has_more  # 是否有下一页
-    next_page_token = get_result.page_token  # 下一页token
-    new_record_ids = []
-    for record in get_result.items:
-        new_record_ids.append(record.record_id)
-        print(record.fields)
-    # 更新记录
-    new_record = (lark.bitable.v1.AppTableRecord.builder()
-                  .record_id(new_record_ids[0])
-                  .fields({'识别结果': '这是识别结果'})
-                  .build())
-    feishu.update_record(new_table_id, new_record)
-    # 获取指定ID记录
-    get_result = feishu.get_record(new_table_id, *new_record_ids)
-    for record in get_result.records:
-        new_record_ids.append(record.record_id)
-        print(record.fields)
-    # 删除指定ID记录
-    feishu.delete_record(new_table_id, new_record_ids[1])