import os
import tarfile
import shutil
from confluent_kafka import Consumer
from minio import Minio
from minio.error import S3Error
from loguru import logger
import configparser

class KafkaConsumer:
    def __init__(self, config_path):
        """
        初始化 Kafka 消费者

        :param config_path: 配置文件路径
        """
        self.config = configparser.ConfigParser()
        self.config.read(config_path)
        self.consumer = Consumer({
            'bootstrap.servers': self.config['kafka']['bootstrap_servers'],
            'group.id': 'my_group',
            'auto.offset.reset': 'earliest'
        })
        self.consumer.subscribe([self.config['kafka']['topic']])
        self.minio_client = Minio(
            self.config['minio']['endpoint'],
            access_key=self.config['minio']['access_key'],
            secret_key=self.config['minio']['secret_key']
        )
        self.download_folder = self.config['local']['download_folder']
        self.remote_folder = self.config['local']['remote_folder']
        self.batch_size = int(self.config['packing']['batch_size'])
        self.size_threshold = int(self.config['packing']['size_threshold'])
        self.files = []
        self.total_size = 0

    def download_file(self, object_name):
        """
        从 MinIO 下载文件

        :param object_name: MinIO 中的对象名称
        """
        try:
            local_path = os.path.join(self.download_folder, object_name)
            os.makedirs(os.path.dirname(local_path), exist_ok=True)
            self.minio_client.fget_object(
                self.config['minio']['bucket'],
                object_name,
                local_path
            )
            self.files.append(local_path)
            self.total_size += os.path.getsize(local_path)
            logger.info(f"Downloaded {object_name} to {local_path}")
        except S3Error as e:
            logger.error(f"Failed to download {object_name}: {e}")
        except Exception as e:
            logger.error(f"An error occurred: {e}")

    def pack_files(self):
        """
        将已下载的文件打包成 tar.gz 文件
        """
        if not self.files:
            return
        tar_name = os.path.join(self.download_folder, f"batch_{len(self.files)}.tar.gz")
        with tarfile.open(tar_name, "w:gz") as tar:
            for file in self.files:
                tar.add(file, arcname=os.path.basename(file))
        shutil.move(tar_name, os.path.join(self.remote_folder, os.path.basename(tar_name)))
        self.files = []
        self.total_size = 0
        logger.info(f"Packed files into {tar_name}")

    def run(self):
        """
        启动 Kafka 消费者并处理消息
        """
        try:
            while True:
                msg = self.consumer.poll(1.0)
                if msg is None:
                    continue
                if msg.error():
                    logger.error(f"Consumer error: {msg.error()}")
                    continue
                object_name = msg.value().decode('utf-8')
                self.download_file(object_name)
                if len(self.files) >= self.batch_size or self.total_size >= self.size_threshold:
                    self.pack_files()
        except KeyboardInterrupt:
            logger.info("Shutting down consumer...")
        finally:
            self.consumer.close()

if __name__ == "__main__":
    worker = KafkaConsumer("../config/config.ini")
    worker.run()