UserLevelThrottledTranslationClient.java

package com.yumu.noveltranslator.adapter.out.translate;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.yumu.noveltranslator.domain.model.Glossary;
import com.yumu.noveltranslator.properties.TranslationLimitProperties;
import com.yumu.noveltranslator.util.SecurityUtil;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.net.Proxy;
import java.net.ProxySelector;
import java.net.SocketAddress;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import jakarta.annotation.PreDestroy;

/**
 * 基于用户级别的限流翻译客户端
 * 为不同用户级别提供不同的并发限制
 * 支持基于优秀率概率的动态轮询翻译服务
 *
 * 轮询策略:
 * 1. 统计每个引擎的「优秀翻译次数」(成功 + 快速响应)
 * 2. 计算优秀率 = 优秀次数 / 总请求数
 * 3. 根据优秀率分配请求概率
 * 4. 计数器每分钟重置,保持时效性
 */
@Service
@RequiredArgsConstructor
@Slf4j
public class UserLevelThrottledTranslationClient implements com.yumu.noveltranslator.port.out.TranslationClientPort {

    // 超时配置(毫秒)
    private static final int CONNECT_TIMEOUT_MS = 5000;    // 连接超时 5 秒
    private static final int READ_TIMEOUT_MS = 30000;      // 读取超时 30 秒
    private static final long SEMAPHORE_TIMEOUT_SECONDS = 30; // 信号量等待超时 30 秒

    @Value("${translation.python.url:http://llm-engine:8000/translate}")
    private String pythonTranslateUrl;

    @Value("${translation.python.api-key:#{null}}")
    private String pythonServiceApiKey;

    private static final boolean ENABLE_ROUND_ROBIN_TRANSLATION = true;

    // ========== 轮询配置 ==========

    // 优秀翻译判定标准:响应时间 <= 1000ms 视为优秀
    private static final long EXCELLENT_RESPONSE_TIME_MS = 1000;

    // 计数器重置间隔:60 秒
    private static final long STATS_RESET_INTERVAL_SECONDS = 60;

    // 最小请求数(冷启动保护,避免统计样本不足)
    private static final int MIN_REQUESTS_FOR_STATS = 5;

    // ========== 轮询计数器 ==========

    // Python 服务统计
    private final AtomicInteger pythonRequestCount = new AtomicInteger(0);
    private final AtomicInteger pythonExcellentCount = new AtomicInteger(0);
    private final AtomicLong pythonLastResetTime = new AtomicLong(System.currentTimeMillis());

    // MTran 服务统计
    private final AtomicInteger mTranRequestCount = new AtomicInteger(0);
    private final AtomicInteger mTranExcellentCount = new AtomicInteger(0);
    private final AtomicLong mTranLastResetTime = new AtomicLong(System.currentTimeMillis());

    private final ExternalTranslationService externalTranslationService;
    private final TranslationLimitProperties limitProperties;
    private final TokenAwareRateLimiter tokenAwareRateLimiter;

    private final ConcurrentHashMap<String, Semaphore> userSemaphores = new ConcurrentHashMap<>();
    private final ConcurrentHashMap<String, Long> semaphoreLastAccessTime = new ConcurrentHashMap<>();

    /**
     * 禁用代理的 ProxySelector,确保内部 Docker 服务直连
     */
    private static final ProxySelector NO_PROXY_SELECTOR = new ProxySelector() {
        @Override
        public List<Proxy> select(URI uri) {
            return List.of(Proxy.NO_PROXY);
        }
        @Override
        public void connectFailed(URI uri, SocketAddress sa, IOException ioe) {
            // 连接失败由上层 HttpClient 处理
        }
    };

    private final HttpClient client = HttpClient.newBuilder()
            .connectTimeout(Duration.ofMillis(CONNECT_TIMEOUT_MS))
            .executor(Executors.newVirtualThreadPerTaskExecutor())
            .proxy(NO_PROXY_SELECTOR)
            .build();

    /**
     * 获取用户专属的信号量(基于 userId 隔离)
     */
    private Semaphore getUserSemaphore() {
        String userId = getCurrentUserId();
        String userLevel = getCurrentUserLevel();
        return getUserSemaphore(userId, userLevel);
    }

    private Semaphore getUserSemaphore(String userId, String userLevel) {
        String finalUserId;
        String finalUserLevel;

        if (userId == null || "anonymous".equals(userId)) {
            finalUserId = "anonymous";
            finalUserLevel = "anonymous";
        } else if (!userId.startsWith("user_") && !userId.startsWith("anonymous")) {
            finalUserId = "user_" + userId;
            finalUserLevel = userLevel;
        } else {
            finalUserId = userId;
            finalUserLevel = userLevel;
        }

        semaphoreLastAccessTime.put(finalUserId, System.nanoTime());

        return userSemaphores.computeIfAbsent(finalUserId, uid -> {
            int permits;
            if ("max".equalsIgnoreCase(finalUserLevel)) {
                permits = limitProperties.getMaxConcurrencyLimit();
            } else if ("pro".equalsIgnoreCase(finalUserLevel) || "premium".equalsIgnoreCase(finalUserLevel)) {
                permits = limitProperties.getProConcurrencyLimit();
            } else if (finalUserId.equals("anonymous")) {
                permits = limitProperties.getAnonymousConcurrencyLimit();
            } else {
                permits = limitProperties.getFreeConcurrencyLimit();
            }
            log.info("为用户 {} (level={}) 创建信号量,permits={}", uid, finalUserLevel, permits);
            return new Semaphore(permits);
        });
    }

    /**
     * 获取当前用户 ID(从 SecurityContext)
     */
    private String getCurrentUserId() {
        return SecurityUtil.getCurrentUserIdOrAnonymous();
    }

    /**
     * 获取当前用户等级
     */
    private String getCurrentUserLevel() {
        return SecurityUtil.getCurrentUserLevelOrDefault();
    }

    /**
     * 定时清理 30 分钟内未使用的信号量,防止内存泄漏
     */
    @Scheduled(cron = "0 */30 * * * *")
    public void cleanupIdleSemaphores() {
        long now = System.nanoTime();
        long idleThreshold = TimeUnit.MINUTES.toNanos(30);
        int cleanedCount = 0;

        for (Map.Entry<String, Long> entry : semaphoreLastAccessTime.entrySet()) {
            if (now - entry.getValue() > idleThreshold) {
                String userId = entry.getKey();
                userSemaphores.remove(userId);
                semaphoreLastAccessTime.remove(userId);
                cleanedCount++;
                log.info("清理空闲信号量:userId={}", userId);
            }
        }

        if (cleanedCount > 0) {
            log.info("信号量清理完成:共清理 {} 个空闲信号量", cleanedCount);
        }
    }


    /**
     * 翻译请求(支持指定是否使用 MTranServer 及 html 模式)
     * 支持引擎降级机制:远程引擎 (Python) 失败时自动降级到本地引擎 (MTranServer)
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     * @param engine 翻译引擎
     * @param html 是否启用 HTML 翻译模式(仅对 MTranServer 有效)
     * @return 翻译结果 JSON
     */
    public String translate(String text, String targetLang, String engine, boolean html) {
        return translate(text, targetLang, engine, html, false);
    }

    /**
     * 强制走 Python 服务翻译(专家模式)
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     * @param engine 翻译引擎(如 google/deepl 等)
     * @return 翻译结果 JSON
     */
    public String translateWithPython(String text, String targetLang, String engine) {
        String userId = getCurrentUserId();
        String userLevel = getCurrentUserLevel();
        return translateWithPython(text, targetLang, engine, userId, userLevel);
    }

    public String translateWithPython(String text, String targetLang, String engine, String userId, String userLevel) {
        Semaphore userSemaphore = getUserSemaphore(userId, userLevel);
        int estimatedTokens = TokenAwareRateLimiter.estimateTokens(text);

        if (!tokenAwareRateLimiter.tryConsume(userId, userLevel, estimatedTokens)) {
            log.warn("[TPM限流] userId={}, level={}, tokens={}, 超过 TPM 配额", userId, userLevel, estimatedTokens);
            throw new RuntimeException("翻译频率过高,请稍后重试");
        }

        try {
            if (userSemaphore.tryAcquire(SEMAPHORE_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
                try {
                    return doTranslateRequest(text, targetLang, engine, List.of());
                } catch (Exception e) {
                    // Python 不可达,降级到 MTranServer
                    log.warn("专家模式 Python 翻译失败,降级到 MTranServer: {}", e.getMessage());
                    try {
                        return doExternalTranslationRequest(text, targetLang, false);
                    } catch (Exception e2) {
                        log.error("MTranServer 也失败: {}", e2.getMessage());
                        throw new RuntimeException("所有翻译引擎均失败: " + e.getMessage() + "; " + e2.getMessage(), e2);
                    }
                } finally {
                    userSemaphore.release();
                }
            } else {
                String errorMsg = "并发请求过多,请稍后重试";
                log.warn("限流:{}", errorMsg);
                throw new RuntimeException(errorMsg);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            tokenAwareRateLimiter.refund(userId, estimatedTokens);
            throw new RuntimeException("请求被中断", e);
        } catch (Exception e) {
            tokenAwareRateLimiter.refund(userId, estimatedTokens);
            String errorMsg = "翻译失败:" + e.getMessage();
            log.error("Python 翻译失败:{}", errorMsg);
            throw new RuntimeException(errorMsg, e);
        }
    }

    /**
     * 翻译请求(支持快速/专家模式)
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     * @param engine 翻译引擎
     * @param html 是否启用 HTML 翻译模式
     * @param fastMode 快速模式:true=大模型 30% 概率,false=大模型 70% 概率
     * @return 翻译结果 JSON
     */
    public String translate(String text, String targetLang, String engine, boolean html, boolean fastMode) {
        return translate(text, targetLang, engine, html, fastMode, List.of());
    }

    /**
     * 翻译请求(支持快速/专家模式 + 术语表)
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     * @param engine 翻译引擎
     * @param html 是否启用 HTML 翻译模式
     * @param fastMode 快速模式:true=大模型 30% 概率,false=大模型 70% 概率
     * @param glossaryTerms 术语表词条
     * @return 翻译结果 JSON
     */
    public String translate(String text, String targetLang, String engine, boolean html, boolean fastMode, List<Glossary> glossaryTerms) {
        String userId = getCurrentUserId();
        String userLevel = getCurrentUserLevel();
        return translate(text, targetLang, engine, html, fastMode, glossaryTerms, userId, userLevel);
    }

    /**
     * 翻译请求(显式传入 userId 和 userLevel,适用于虚拟线程等 SecurityContext 不传播的场景)
     */
    public String translate(String text, String targetLang, String engine, boolean html, boolean fastMode, List<Glossary> glossaryTerms, String userId, String userLevel) {
        Semaphore userSemaphore = getUserSemaphore(userId, userLevel);
        int estimatedTokens = TokenAwareRateLimiter.estimateTokens(text);

        if (!tokenAwareRateLimiter.tryConsume(userId, userLevel, estimatedTokens)) {
            log.warn("[TPM限流] userId={}, level={}, tokens={}, 超过 TPM 配额", userId, userLevel, estimatedTokens);
            throw new RuntimeException("翻译频率过高,请稍后重试");
        }

        try {
            // 尝试获取许可,设定超时时间
            if (userSemaphore.tryAcquire(SEMAPHORE_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
                try {
                    // HTML 模式(阅读器):直接使用 MTranServer
                    if (html) {
                        return doExternalTranslationRequest(text, targetLang, html);
                    }
                    // 有术语表时强制走 Python 服务(MTranServer 不支持术语表)
                    if (glossaryTerms != null && !glossaryTerms.isEmpty()) {
                        log.info("[术语表路由] 强制使用 Python 服务(术语表大小={})", glossaryTerms.size());
                        try {
                            return doTranslateRequest(text, targetLang, engine, glossaryTerms);
                        } catch (Exception e) {
                            // Python 不可达,降级到 MTranServer(无术语表)
                            log.warn("术语表模式 Python 翻译失败,降级到 MTranServer(术语表将不生效): {}", e.getMessage());
                            try {
                                return doExternalTranslationRequest(text, targetLang);
                            } catch (Exception e2) {
                                log.error("MTranServer 也失败: {}", e2.getMessage());
                                throw new RuntimeException("所有翻译引擎均失败: " + e.getMessage() + "; " + e2.getMessage(), e2);
                            }
                        }
                    }
                    // 基于优秀率概率轮询(fast=30% 大模型,expert=70% 大模型)
                    return translateWithRoundRobin(text, targetLang, engine, fastMode, glossaryTerms);
                } catch (Exception e) {
                    tokenAwareRateLimiter.refund(userId, estimatedTokens);
                    throw e;
                } finally {
                    userSemaphore.release();
                }
            } else {
                String errorMsg = "并发请求过多,请稍后重试";
                log.warn("限流:{}", errorMsg);
                throw new RuntimeException(errorMsg);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            tokenAwareRateLimiter.refund(userId, estimatedTokens);
            String errorMsg = "请求被中断";
            log.warn("中断:{}", errorMsg);
            throw new RuntimeException(errorMsg, e);
        } catch (RuntimeException e) {
            throw e;
        } catch (Exception e) {
            tokenAwareRateLimiter.refund(userId, estimatedTokens);
            String errorMsg = "翻译请求失败:" + e.getMessage();
            log.error("错误:{}", errorMsg);
            throw new RuntimeException(errorMsg, e);
        }
    }

    /**
     * 基于优秀率概率的轮询翻译(支持双向降级:Python⇄MTranServer)
     *
     * 降级策略:
     * 1. 轮询到 Python 服务 → Python 所有引擎失败 → 降级到 MTranServer → MTranServer 失败 → 抛出异常
     * 2. 轮询到 MTranServer → MTranServer 失败 → 降级到 Python 服务 → Python 所有引擎失败 → 抛出异常
     *
     * 概率控制:
     * - fastMode=true:  Python 大模型概率上限 30%
     * - fastMode=false: Python 大模型概率上限 70%
     */
    private String translateWithRoundRobin(String text, String targetLang, String engine, boolean fastMode, List<Glossary> glossaryTerms) {
        // 检查并重置过期统计
        resetStatsIfNeeded();

        // 计算当前应该使用哪个服务
        boolean usePythonService = shouldUsePythonService(fastMode);

        if (usePythonService) {
            // 场景 1:轮询到 Python 服务
            log.info("[轮询路由] 选择 Python 大模型(fastMode={}, pythonCount={}, mTranCount={}, textLength={})",
                    fastMode, pythonRequestCount.get(), mTranRequestCount.get(), text.length());
            try {
                return doTranslateRequest(text, targetLang, engine, glossaryTerms);
            } catch (Exception e) {
                // Python 服务(所有引擎)失败,降级到 MTranServer(本地引擎)
                log.warn("Python 服务翻译失败,降级到 MTranServer: {}", e.getMessage());
                try {
                    return doExternalTranslationRequest(text, targetLang);
                } catch (Exception e2) {
                    // MTranServer 也失败,抛出异常
                    log.error("MTranServer 翻译也失败:{} (根本原因:{})", e2.getMessage(),
                             e2.getCause() != null ? e2.getCause().getMessage() : "未知", e2);
                    throw new RuntimeException("所有翻译引擎均失败 (Python → MTranServer): " + e.getMessage() + "; " + e2.getMessage(), e2);
                }
            }
        } else {
            // 场景 2:轮询到 MTranServer(本地引擎)
            log.info("[轮询路由] 选择 MTranServer(fastMode={}, pythonCount={}, mTranCount={}, textLength={})",
                    fastMode, pythonRequestCount.get(), mTranRequestCount.get(), text.length());
            try {
                return doExternalTranslationRequest(text, targetLang);
            } catch (Exception e) {
                // MTranServer 失败,降级到 Python 服务
                log.warn("MTranServer 翻译失败,降级到 Python 服务:{}", e.getMessage());
                try {
                    return doTranslateRequest(text, targetLang, engine, List.of());
                } catch (Exception e2) {
                    // Python 服务(所有引擎)也失败,抛出异常
                    log.error("Python 服务翻译也失败:{}", e2.getMessage());
                    throw new RuntimeException("所有翻译引擎均失败 (MTranServer → Python): " + e.getMessage() + "; " + e2.getMessage(), e2);
                }
            }
        }
    }

    /**
     * 带降级机制的翻译请求(非轮询模式)
     *
     * 降级策略:
     * 1. 首先尝试 Python 远程引擎
     * 2. Python 失败则降级到 MTranServer 本地引擎
     * 3. MTranServer 也失败则抛出异常
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     * @param engine 翻译引擎
     * @return 翻译结果 JSON
     */
    private String translateWithFallback(String text, String targetLang, String engine) {
        try {
            log.debug("尝试 Python 服务翻译 [engine={}]", engine);
            return doTranslateRequest(text, targetLang, engine, List.of());
        } catch (Exception e) {
            // Python 服务失败,降级到 MTranServer
            log.warn("Python 服务翻译失败,降级到 MTranServer: {}", e.getMessage());
            try {
                return doExternalTranslationRequest(text, targetLang);
            } catch (Exception e2) {
                // MTranServer 也失败,抛出异常
                log.error("MTranServer 翻译也失败:{}", e2.getMessage());
                throw new RuntimeException("所有翻译引擎均失败: " + e.getMessage() + "; " + e2.getMessage(), e2);
            }
        }
    }

    /**
     * 判断是否应该使用 Python 服务
     * @param fastMode true=大模型 30% 概率,false=大模型 70% 概率
     * @return true 使用 Python 服务,false 使用 MTran 服务
     */
    private boolean shouldUsePythonService(boolean fastMode) {
        int pythonCount = pythonRequestCount.get();
        int mTranCount = mTranRequestCount.get();

        // 冷启动阶段:fast 模式优先走 MTran(快速),expert 模式交替收集样本
        if (pythonCount < MIN_REQUESTS_FOR_STATS && mTranCount < MIN_REQUESTS_FOR_STATS) {
            if (fastMode) {
                // fast 模式:优先用 MTran,第 3 次开始轮询到 Python 收集样本
                return (pythonCount + mTranCount) >= 3 && (pythonCount + mTranCount) % 2 == 0;
            }
            // expert 模式:简单轮流收集样本
            return (pythonCount + mTranCount) % 2 == 0;
        }
        if (pythonCount < MIN_REQUESTS_FOR_STATS) {
            return true;  // Python 样本不足,优先用它
        }
        if (mTranCount < MIN_REQUESTS_FOR_STATS) {
            return false; // MTran 样本不足,优先用它
        }

        // 样本充足,基于优秀率计算概率
        int pythonExcellent = pythonExcellentCount.get();
        int mTranExcellent = mTranExcellentCount.get();

        // 计算优秀率
        double pythonExcellentRate = (double) pythonExcellent / pythonCount;
        double mTranExcellentRate = (double) mTranExcellent / mTranCount;

        // 根据模式设置概率上限
        double maxPythonProbability = fastMode ? 0.3 : 0.7;

        // 计算 Python 被选中的概率 = Python 优秀率 / (Python 优秀率 + MTran 优秀率)
        double totalExcellent = pythonExcellentRate + mTranExcellentRate;
        if (totalExcellent == 0) {
            // 两者都没有优秀记录,使用默认概率
            return Math.random() < maxPythonProbability;
        }

        double pythonProbability = pythonExcellentRate / totalExcellent;
        // 限制 Python 最大概率
        double cappedProbability = Math.min(pythonProbability, maxPythonProbability);
        double random = Math.random();

        log.info("[轮询统计] fastMode={} | Python: 请求={}, 优秀={}, 优秀率={:.2%} | MTran: 请求={}, 优秀={}, 优秀率={:.2%} | Python 原始概率={:.2%}, 上限={:.0%}, 限制后={:.2%}",
                 fastMode,
                 pythonCount, pythonExcellent, pythonExcellentRate,
                 mTranCount, mTranExcellent, mTranExcellentRate,
                 pythonProbability, maxPythonProbability, cappedProbability);

        return random < cappedProbability;
    }

    /**
     * 检查并重置过期的统计计数器
     */
    private void resetStatsIfNeeded() {
        long now = System.currentTimeMillis();
        long resetIntervalMs = STATS_RESET_INTERVAL_SECONDS * 1000;

        // 使用 CAS 确保只有一个线程执行重置(避免竞态条件)
        long pythonLastReset = pythonLastResetTime.get();
        if (now - pythonLastReset > resetIntervalMs
                && pythonLastResetTime.compareAndSet(pythonLastReset, now)) {
            pythonRequestCount.set(0);
            pythonExcellentCount.set(0);
            log.info("Python 服务统计计数器已重置");
        }

        long mTranLastReset = mTranLastResetTime.get();
        if (now - mTranLastReset > resetIntervalMs
                && mTranLastResetTime.compareAndSet(mTranLastReset, now)) {
            mTranRequestCount.set(0);
            mTranExcellentCount.set(0);
            log.info("MTran 服务统计计数器已重置");
        }
    }

    /**
     * 记录翻译结果统计
     * @param isPython true=Python 服务,false=MTran 服务
     * @param success 是否成功
     * @param responseTime 响应时间(毫秒)
     */
    private void recordStats(boolean isPython, boolean success, long responseTime) {
        if (isPython) {
            pythonRequestCount.incrementAndGet();
            if (success && responseTime <= EXCELLENT_RESPONSE_TIME_MS) {
                pythonExcellentCount.incrementAndGet();
            }
        } else {
            mTranRequestCount.incrementAndGet();
            if (success && responseTime <= EXCELLENT_RESPONSE_TIME_MS) {
                mTranExcellentCount.incrementAndGet();
            }
        }
    }

    /**
     * 执行 HTTP 翻译请求(Python 服务)
     */
    private String doTranslateRequest(String text, String targetLang, String engine, List<Glossary> glossaryTerms) throws Exception {
        long startTime = System.currentTimeMillis();
        boolean success = false;
        try {
            Map<String, Object> bodyMap = new LinkedHashMap<>();
            bodyMap.put("text", text);
            bodyMap.put("target_lang", targetLang);
            bodyMap.put("engine", "openai");
            bodyMap.put("fallback", true);

            // 注入术语表
            if (glossaryTerms != null && !glossaryTerms.isEmpty()) {
                List<Map<String, String>> terms = glossaryTerms.stream()
                        .map(g -> Map.of("source", g.getSourceWord(), "target", g.getTargetWord()))
                        .toList();
                bodyMap.put("glossaryTerms", terms);
            }

            String jsonBody = JSON.toJSONString(bodyMap);
            String result = doPythonServiceRequest(jsonBody, text);
            success = true;
            return result;
        } finally {
            long costTime = System.currentTimeMillis() - startTime;
            recordStats(true, success, costTime);
        }
    }

    /**
     * 调用 Python 翻译服务
     */
    private String doPythonServiceRequest(String jsonBody, String text) throws Exception {
        log.info("[Python请求] URL={}, body length={}", pythonTranslateUrl, jsonBody.length());
        var requestBuilder = HttpRequest.newBuilder()
                .uri(URI.create(pythonTranslateUrl))
                .version(HttpClient.Version.HTTP_1_1)
                .header("Content-Type", "application/json; charset=UTF-8")
                .header("Accept", "application/json")
                .timeout(Duration.ofMillis(READ_TIMEOUT_MS));

        // 添加服务间认证 Key
        if (pythonServiceApiKey != null && !pythonServiceApiKey.isEmpty()) {
            requestBuilder.header("X-Service-Key", pythonServiceApiKey);
        }

        HttpRequest request = requestBuilder
                .POST(HttpRequest.BodyPublishers.ofString(jsonBody, StandardCharsets.UTF_8))
                .build();
        log.info("[Python请求] 发送请求到 {}", request.uri());
        HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());
        log.info("[Python响应] status={}, body length={}", response.statusCode(), response.body().length());
        if (response.statusCode() != 200) {
            throw new Exception("HTTP 错误:" + response.statusCode());
        }
        return response.body();
    }

    /**
     * 调用 MTranService 翻译服务(默认 html=false)
     * MTranServer 返回格式:{"result": "翻译内容"}
     * 包装成标准格式返回:{"success": true, "engine": "mtran", "translatedContent": "翻译内容"}
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     */
    private String doExternalTranslationRequest(String text, String targetLang) {
        return doExternalTranslationRequest(text, targetLang, false);
    }

    /**
     * 调用 MTranService 翻译服务
     * MTranServer 返回格式:{"result": "翻译内容"}
     * 包装成标准格式返回:{"success": true, "engine": "mtran", "translatedContent": "翻译内容"}
     *
     * @param text 待翻译文本
     * @param targetLang 目标语言
     * @param html 是否启用 HTML 翻译模式
     */
    private String doExternalTranslationRequest(String text, String targetLang, boolean html) {
        long startTime = System.currentTimeMillis();
        boolean success = false;

        try {
            log.info("[MTran请求] targetLang={}, html={}, textLength={}", targetLang, html, text.length());
            JSONObject mtranResponse = externalTranslationService.translate("auto", targetLang, text, html);
            long costTime = System.currentTimeMillis() - startTime;

            // 从 MTranServer 响应中提取 result 字段
            String translatedContent = mtranResponse.getString("result");
            if (translatedContent == null || translatedContent.isBlank()) {
                // MTran 返回错误响应,抛出异常以触发降级到 Python
                log.warn("[MTran异常响应] 缺少 result 字段,原始响应: {}", mtranResponse);
                throw new RuntimeException("MTran 返回异常响应: " + mtranResponse);
            }

            // 解码 HTML 实体编码(&nbsp; &amp; &lt; &gt; 等)
            translatedContent = decodeHtmlEntities(translatedContent);

            log.info("[MTran响应] costMs={}, resultLength={}", costTime, translatedContent.length());

            // 包装成标准格式
            JSONObject standardResponse = new JSONObject();
            standardResponse.put("success", true);
            standardResponse.put("engine", "mtran");
            standardResponse.put("translatedContent", translatedContent);

            success = true;
            return standardResponse.toJSONString();
        } catch (Exception e) {
            long costTime = System.currentTimeMillis() - startTime;
            log.error("[MTran失败] costMs={}, error={}", costTime, e.getMessage(), e);
            throw e;
        } finally {
            long costTime = System.currentTimeMillis() - startTime;
            recordStats(false, success, costTime);

            // 记录慢请求
            if (costTime > 5000) {
                log.info("[慢请求] MTran 翻译耗时:{}ms, 文本长度:{}, html: {}", costTime, text.length(), html);
            }
        }
    }

    /**
     * 解码 HTML 实体编码
     */
    private String decodeHtmlEntities(String text) {
        if (text == null) return null;
        return text
                .replace("&nbsp;", " ")
                .replace("&nbsp;", " ")
                .replace("&amp;", "&")
                .replace("&lt;", "<")
                .replace("&gt;", ">")
                .replace("&quot;", "\"")
                .replace("&#39;", "'")
                .replace("&#x27;", "'")
                .replace("&apos;", "'")
                .replace("&mdash;", "—")
                .replace("&ndash;", "–")
                .replace("&hellip;", "…")
                .replace("&laquo;", "«")
                .replace("&raquo;", "»")
                .replace("&copy;", "©")
                .replace("&reg;", "®")
                .replace("&trade;", "™");
    }


    /**
     * 获取统计信息(用于监控和调试)
     */
    public Map<String, Object> getRoundRobinStats() {
        Map<String, Object> stats = new LinkedHashMap<>();

        int pythonCount = pythonRequestCount.get();
        int mTranCount = mTranRequestCount.get();

        stats.put("python_requests", pythonCount);
        stats.put("python_excellent", pythonExcellentCount.get());
        stats.put("python_excellent_rate", pythonCount > 0
            ? String.format("%.2f%%", (double) pythonExcellentCount.get() / pythonCount * 100)
            : "N/A");

        stats.put("mtran_requests", mTranCount);
        stats.put("mtran_excellent", mTranExcellentCount.get());
        stats.put("mtran_excellent_rate", mTranCount > 0
            ? String.format("%.2f%%", (double) mTranExcellentCount.get() / mTranCount * 100)
            : "N/A");

        stats.put("next_reset_seconds", STATS_RESET_INTERVAL_SECONDS -
            (System.currentTimeMillis() - Math.max(pythonLastResetTime.get(), mTranLastResetTime.get())) / 1000);

        return stats;
    }

    @PreDestroy
    public void shutdown() {
        client.close();
        userSemaphores.clear();
        semaphoreLastAccessTime.clear();
        log.info("UserLevelThrottledTranslationClient 已关闭");
    }
}