TranslationPipeline.java

package com.yumu.noveltranslator.domain.service;

import com.yumu.noveltranslator.port.dto.translation.ConsistencyTranslationResult;
import com.yumu.noveltranslator.port.dto.translation.RagTranslationResponse;
import com.yumu.noveltranslator.domain.model.Glossary;
import com.yumu.noveltranslator.enums.TranslationMode;
import com.yumu.noveltranslator.domain.service.EntityConsistencyService;
import com.yumu.noveltranslator.application.service.RagTranslationApplicationService;
import com.yumu.noveltranslator.port.out.TranslationCachePort;
import com.yumu.noveltranslator.port.out.TranslationClientPort;
import com.yumu.noveltranslator.port.out.TeamTranslationPort;
import com.yumu.noveltranslator.util.CacheKeyUtil;
import com.yumu.noveltranslator.util.ExternalResponseUtil;
import lombok.extern.slf4j.Slf4j;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * 统一翻译管线组件
 *
 * 四级翻译管线架构:
 * L1: 三级缓存查询(Caffeine → Redis → 数据库)
 * L2: RAG 语义匹配(向量相似度查询)
 * L3: 实体一致性翻译(术语表 + 占位符保护)
 * L4: 直译(Python/MTranServer 轮询)
 *
 * 所有翻译路径统一使用此组件,消除 TranslationService、
 * MultiAgentTranslationService、TranslationTaskService 中的重复管线逻辑。
 */
@Slf4j
public class TranslationPipeline {

    // 广告关键词检测列表
    private static final String[] AD_KEYWORDS = {
            "人工智能助手", "生成式人工智能", "体验生成式", "获取写作", "Gemini", "Google AI"
    };

    /** 文本超过此字符数时启用分段翻译 */
    private static final int SEGMENT_TRANSLATION_THRESHOLD = 5000;
    /** 分段翻译的目标段大小(字符数) */
    private static final int TRANSLATION_SEGMENT_SIZE = 3000;

    private final TranslationCachePort cacheService;
    private final RagTranslationApplicationService ragTranslationService;
    private final EntityConsistencyService entityConsistencyService;
    private final TranslationClientPort translationClient;
    private final TranslationPostProcessingService postProcessingService;
    private final TeamTranslationPort teamTranslationService;
    private final Long userId;
    private final String userLevel;
    private final String docId;
    private final List<Glossary> glossaryTerms;

    /**
     * 创建翻译管线实例(标准模式,L4 走直译)
     */
    public TranslationPipeline(
            TranslationCachePort cacheService,
            RagTranslationApplicationService ragTranslationService,
            EntityConsistencyService entityConsistencyService,
            TranslationClientPort translationClient,
            TranslationPostProcessingService postProcessingService,
            Long userId,
            String docId) {
        this(cacheService, ragTranslationService, entityConsistencyService, translationClient,
             postProcessingService, null, userId, docId, List.of());
    }

    /**
     * 创建翻译管线实例(支持团队模式,L4 可走 TeamTranslationPort)
     *
     * @param teamTranslationService 团队翻译服务(可为 null,null 时 executeTeam 降级为标准直译)
     */
    public TranslationPipeline(
            TranslationCachePort cacheService,
            RagTranslationApplicationService ragTranslationService,
            EntityConsistencyService entityConsistencyService,
            TranslationClientPort translationClient,
            TranslationPostProcessingService postProcessingService,
            TeamTranslationPort teamTranslationService,
            Long userId,
            String docId) {
        this(cacheService, ragTranslationService, entityConsistencyService, translationClient,
             postProcessingService, teamTranslationService, userId, docId, List.of());
    }

    /**
     * 创建翻译管线实例(完整构造,支持术语表)
     */
    public TranslationPipeline(
            TranslationCachePort cacheService,
            RagTranslationApplicationService ragTranslationService,
            EntityConsistencyService entityConsistencyService,
            TranslationClientPort translationClient,
            TranslationPostProcessingService postProcessingService,
            TeamTranslationPort teamTranslationService,
            Long userId,
            String docId,
            List<Glossary> glossaryTerms) {
        this(cacheService, ragTranslationService, entityConsistencyService, translationClient,
             postProcessingService, teamTranslationService, userId, null, docId, glossaryTerms);
    }

    /**
     * 创建翻译管线实例(完整构造,支持术语表和 userLevel)
     */
    public TranslationPipeline(
            TranslationCachePort cacheService,
            RagTranslationApplicationService ragTranslationService,
            EntityConsistencyService entityConsistencyService,
            TranslationClientPort translationClient,
            TranslationPostProcessingService postProcessingService,
            TeamTranslationPort teamTranslationService,
            Long userId,
            String userLevel,
            String docId,
            List<Glossary> glossaryTerms) {
        this.cacheService = cacheService;
        this.ragTranslationService = ragTranslationService;
        this.entityConsistencyService = entityConsistencyService;
        this.translationClient = translationClient;
        this.postProcessingService = postProcessingService;
        this.teamTranslationService = teamTranslationService;
        this.userId = userId;
        this.userLevel = userLevel;
        this.docId = docId;
        this.glossaryTerms = glossaryTerms != null ? glossaryTerms : List.of();
    }

    /**
     * 执行完整四级翻译管线
     *
     * @param text       待翻译文本
     * @param targetLang 目标语言
     * @param mode       翻译质量档位
     * @return 翻译结果,失败返回 null
     */
    public String execute(String text, String targetLang, TranslationMode mode) {
        // Check if segmentation is needed
        List<String> segments = splitTextForTranslation(text);

        if (segments.size() == 1) {
            // Short text: original single-pass flow
            return executeSegment(text, targetLang, mode);
        }

        // Long text: translate each segment and merge
        log.info("分段翻译: 原文{}字, 分为{}段", text.length(), segments.size());
        StringBuilder result = new StringBuilder();

        for (int i = 0; i < segments.size(); i++) {
            String segment = segments.get(i);
            String translated = executeSegment(segment, targetLang, mode);
            if (translated != null && !translated.isBlank()) {
                // Strip trailing whitespace from translated segment — separator added explicitly below
                String cleaned = translated.stripTrailing();
                result.append(cleaned);
            } else {
                // Translation failed, keep original segment
                result.append(segment);
            }
            // Add paragraph separator between segments (but not after the last one)
            if (i < segments.size() - 1) {
                result.append("\n\n");
            }
        }

        return result.toString();
    }

    /**
     * 团队模式翻译管线(完整四级管线 + L4 走多 Agent 协作)
     *
     * L1: 缓存查询(分层:仅 team 模式)
     * L2: RAG 语义匹配
     * L3: 实体一致性(提取实体 + 占位符保护)
     * L4: TeamTranslationService 多 Agent 协作翻译
     *
     * @param text           待翻译文本
     * @param sourceLang     源语言
     * @param targetLang     目标语言
     * @param mode           翻译质量档位(固定为 TEAM)
     * @param novelType      小说类型
     * @param glossaryTerms  术语表
     * @return 翻译结果,失败返回 null
     */
    public String executeTeam(
            String text,
            String sourceLang,
            String targetLang,
            TranslationMode mode,
            String novelType,
            List<Glossary> glossaryTerms) {

        String cacheKey = CacheKeyUtil.buildCacheKey(text, targetLang);

        // L1: 分层缓存查询
        String cached = cacheService.getCacheByMode(cacheKey, mode.getName()).orElse(null);
        if (cached != null) {
            log.debug("Pipeline 团队模式缓存命中 [{}]", cacheKey.substring(0, Math.min(16, cacheKey.length())));
            return cached;
        }

        // L2: RAG 语义匹配(带模式层级过滤)
        RagTranslationResponse ragResult = ragTranslationService.searchSimilarWithModes(userId, text, targetLang, mode.getAllowedModes());
        if (ragResult != null && ragResult.isDirectHit()) {
            log.info("Pipeline 团队模式 RAG 直接命中,相似度: {}", ragResult.getSimilarity());
            String result = postProcessingService.fixUntranslatedChinese(text, ragResult.getTranslation(), targetLang, mode.getName());
            cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), "team");
            return result;
        }

        // L3: 实体一致性 + 占位符保护
        String textForTranslation = text;
        EntityConsistencyService.EntityMappingContext mappingContext = null;

        if (userId != null && entityConsistencyService.shouldUseConsistency(text)) {
            log.info("Pipeline 团队模式启用实体一致性");
            try {
                List<String> extractedEntities = entityConsistencyService.extractEntitiesSegmented(text, targetLang);
                if (!extractedEntities.isEmpty()) {
                    Map<String, String> entityTranslations = entityConsistencyService.translateEntities(
                            extractedEntities, targetLang);
                    mappingContext = entityConsistencyService.buildMapping(entityTranslations);
                    textForTranslation = entityConsistencyService.replaceEntitiesWithPlaceholders(text, mappingContext);
                }
            } catch (Exception e) {
                log.warn("团队模式实体一致性失败,降级为无占位符翻译: {}", e.getMessage());
            }
        }

        // L4: 多 Agent 协作翻译
        if (teamTranslationService == null) {
            log.warn("团队模式未初始化 TeamTranslationService,降级为标准直译");
            if (translationClient == null) {
                log.error("标准直译降级失败: translationClient 未初始化");
                return null;
            }
            return executeSegment(text, targetLang, mode);
        }

        try {
            String translated = teamTranslationService.translateChapter(
                    textForTranslation, novelType, sourceLang, targetLang, glossaryTerms);

            if (translated == null || translated.trim().isEmpty()) {
                log.warn("Pipeline 团队模式 L4 翻译结果为空");
                return null;
            }

            // 还原占位符
            if (mappingContext != null) {
                try {
                    translated = entityConsistencyService.restorePlaceholders(translated, mappingContext);
                } catch (Exception e) {
                    log.warn("团队模式占位符还原失败: {}", e.getMessage());
                }
            }

            // 后处理 + 缓存
            translated = postProcessingService.fixUntranslatedChinese(text, translated, targetLang, mode.getName());
            if (shouldCache(text, translated)) {
                cacheService.putCache(cacheKey, text, translated, sourceLang, targetLang, mode.getName(), "team");
                ragTranslationService.storeTranslationMemory(text, translated, targetLang, mode.getName(), userId, mode.getName());
            }

            return translated;
        } catch (Exception e) {
            log.warn("Pipeline 团队模式翻译失败: {}", e.getMessage());
            return null;
        }
    }

    /**
     * 执行单段翻译流程
     */
    private String executeSegment(String text, String targetLang, TranslationMode mode) {
        String cacheKey = CacheKeyUtil.buildCacheKey(text, targetLang);

        // L1: 分层缓存查询
        String cached = cacheService.getCacheByMode(cacheKey, mode.getName()).orElse(null);
        if (cached != null) {
            log.debug("Pipeline 缓存命中 mode={}, key={}", mode.getName(), cacheKey);
            return cached;
        }

        // L2: RAG 语义匹配(带模式层级过滤)
        RagTranslationResponse ragResult = ragTranslationService.searchSimilarWithModes(userId, text, targetLang, mode.getAllowedModes());
        if (ragResult != null && ragResult.isDirectHit()) {
            log.info("Pipeline RAG 直接命中,相似度: {}", ragResult.getSimilarity());
            String result = postProcessingService.fixUntranslatedChinese(text, ragResult.getTranslation(), targetLang, mode.getName());
            cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
            return result;
        }

        // L3: 实体一致性翻译(条件触发:userId 非 null 且文本长度超阈值)
        if (userId != null && entityConsistencyService.shouldUseConsistency(text)) {
            log.info("Pipeline 启用实体一致性翻译");
            ConsistencyTranslationResult consistencyResult =
                    entityConsistencyService.translateWithConsistency(text, targetLang, mode.getName(), userId, docId);
            if (consistencyResult.isConsistencyApplied() && consistencyResult.getTranslatedText() != null) {
                String result = postProcessingService.fixUntranslatedChinese(text, consistencyResult.getTranslatedText(), targetLang, mode.getName());
                if (shouldCache(text, result)) {
                    cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
                }
                ragTranslationService.storeTranslationMemory(text, result, targetLang, mode.getName(), userId, mode.getName());
                return result;
            }
        }

        // L4: 直译(注入术语表)
        String rawJson = translationClient.translate(text, targetLang, mode.getName(), false, glossaryTerms.isEmpty(), glossaryTerms, userId != null ? userId.toString() : null, userLevel);
        String result = ExternalResponseUtil.extractDataField(rawJson);

        if (result == null) {
            log.warn("Pipeline L4 翻译失败,原始响应: {}", rawJson);
            return null;
        }

        if (!isValidTranslation(text, result)) {
            log.warn("Pipeline L4 翻译结果无效(广告关键词或长度异常)");
            return null;
        }

        // 后处理 + 缓存
        result = postProcessingService.fixUntranslatedChinese(text, result, targetLang, mode.getName());
        if (shouldCache(text, result)) {
            cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
            ragTranslationService.storeTranslationMemory(text, result, targetLang, mode.getName(), userId, mode.getName());
        } else {
            log.debug("Pipeline 译文与原文一致,跳过缓存");
        }

        return result;
    }

    /**
     * 快速模式翻译管线(仅缓存 + 直译)
     * 跳过 RAG 和实体一致性,适用于网页翻译等高性能场景
     *
     * @param text       待翻译文本
     * @param targetLang 目标语言
     * @param mode       翻译质量档位
     * @return 翻译结果,失败时返回原文
     */
    public String executeFast(String text, String targetLang, TranslationMode mode) {
        return executeFast(text, targetLang, mode, false);
    }

    /**
     * 快速模式翻译管线(仅缓存 + 直译)
     *
     * @param text       待翻译文本
     * @param targetLang 目标语言
     * @param mode       翻译质量档位
     * @param html       是否启用 HTML 翻译模式(仅对 MTranServer 有效)
     * @return 翻译结果,失败时返回原文
     */
    public String executeFast(String text, String targetLang, TranslationMode mode, boolean html) {
        log.info("[PIPELINE-ENTRY] executeFast START: textLen={}, target={}, mode={}, html={}",
                text.length(), targetLang, mode.getName(), html);
        String cacheKey = CacheKeyUtil.buildCacheKey(text, targetLang) + (html ? ":html" : ":text");
        log.info("[PIPELINE-L1] Cache key: {}", cacheKey.substring(0, Math.min(32, cacheKey.length())));

        // L1: 分层缓存查询
        String cached = cacheService.getCacheByMode(cacheKey, mode.getName()).orElse(null);
        if (cached != null) {
            log.info("[PIPELINE-L1] 缓存命中 mode={}", mode.getName());
            return cached;
        }
        log.info("[PIPELINE-L1] 缓存未命中,继续 L4");

        // L4: 直译(跳过 RAG 和一致性,快速模式直连 MTranServer,注入术语表)
        // 有术语表时强制走 Python 服务(MTranServer 不支持术语表)
        try {
            boolean hasGlossary = !glossaryTerms.isEmpty();
            log.info("[PIPELINE-L4] 调用翻译客户端: hasGlossary={}, textLen={}", hasGlossary, text.length());
            String rawJson = translationClient.translate(text, targetLang, mode.getName(), html, !hasGlossary, glossaryTerms, userId != null ? userId.toString() : null, userLevel);
            log.info("[PIPELINE-L4] 翻译客户端返回: rawJsonLen={}", rawJson != null ? rawJson.length() : 0);
            String result = ExternalResponseUtil.extractDataField(rawJson);
            log.info("[PIPELINE-L4] 提取译文: result={}", result != null ? result.substring(0, Math.min(50, result.length())) : "null");

            if (result != null && !result.isBlank()) {
                if (!isValidTranslation(text, result)) {
                    log.warn("Pipeline 快速模式翻译结果无效,返回原文");
                    return text;
                }
                result = postProcessingService.fixUntranslatedChinese(text, result, targetLang, mode.getName());
                if (shouldCache(text, result)) {
                    cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
                }
                log.info("[PIPELINE-EXIT] 翻译成功: {}", result.substring(0, Math.min(50, result.length())));
                return result;
            }
        } catch (Exception e) {
            log.warn("Pipeline 快速模式翻译失败: {}", e.getMessage(), e);
        }

        // 失败时返回原文
        log.warn("Pipeline 快速模式翻译结果为空,返回原文");
        return text;
    }

    /**
     * 将长文本按段落边界切分为多个片段,用于分段翻译
     *
     * 规则:
     * - 文本 <= 5000 字:不分段,返回单片段
     * - 文本 > 5000 字:按 3000 字分段
     * - 切分点在段落边界(\n\n)或句子边界(。!?\n)
     * - 不破坏原有文字完整性
     */
    private static List<String> splitTextForTranslation(String text) {
        if (text == null || text.length() <= SEGMENT_TRANSLATION_THRESHOLD) {
            return List.of(text != null ? text : "");
        }

        List<String> segments = new ArrayList<>();
        // Split at paragraph boundaries
        String[] paragraphs = text.split("(?<=\n\n)");
        StringBuilder current = new StringBuilder();

        for (String para : paragraphs) {
            if (current.length() + para.length() > TRANSLATION_SEGMENT_SIZE && current.length() > 0) {
                String segment = current.toString();
                if (segment.length() > TRANSLATION_SEGMENT_SIZE * 1.5) {
                    segments.addAll(splitAtSentenceBoundaryForTranslation(segment));
                } else {
                    segments.add(segment);
                }
                current = new StringBuilder();
            }
            current.append(para);
        }

        if (current.length() > 0) {
            String remaining = current.toString();
            if (remaining.length() > TRANSLATION_SEGMENT_SIZE * 1.5) {
                segments.addAll(splitAtSentenceBoundaryForTranslation(remaining));
            } else {
                segments.add(remaining);
            }
        }

        if (segments.isEmpty()) {
            return List.of(text);
        }

        return segments;
    }

    /**
     * 在句子边界切分超长片段
     */
    private static List<String> splitAtSentenceBoundaryForTranslation(String text) {
        List<String> parts = new ArrayList<>();
        String[] sentences = text.split("(?<=[。!?\n])");
        StringBuilder current = new StringBuilder();

        for (String sentence : sentences) {
            if (current.length() + sentence.length() > TRANSLATION_SEGMENT_SIZE && current.length() > 0) {
                parts.add(current.toString());
                current = new StringBuilder();
            }
            current.append(sentence);
        }

        if (current.length() > 0) {
            parts.add(current.toString());
        }

        return parts.isEmpty() ? List.of(text) : parts;
    }

    /**
     * 判断是否应该缓存翻译结果
     * 仅当译文与原文不一致时才缓存
     */
    public static boolean shouldCache(String original, String translated) {
        if (original == null || translated == null) {
            return false;
        }
        String cleanOriginal = original.trim();
        String cleanTranslated = translated.trim();

        if (cleanOriginal.equals(cleanTranslated)) {
            return false;
        }
        if (cleanOriginal.equalsIgnoreCase(cleanTranslated)) {
            return false;
        }
        return true;
    }

    /**
     * 校验翻译结果是否有效
     * 检测非翻译内容(如广告文案、系统提示等)和长度异常
     */
    public static boolean isValidTranslation(String text, String result) {
        if (text == null || result == null) {
            return false;
        }
        // 检测明显的广告/系统提示关键词
        for (String keyword : AD_KEYWORDS) {
            if (result.contains(keyword)) {
                log.warn("翻译结果包含广告关键词:{}", keyword);
                return false;
            }
        }
        // 检测译文长度异常(超过原文 10 倍)
        if (result.length() > text.length() * 10) {
            log.warn("翻译结果长度异常:原文 {} 字符,译文 {} 字符", text.length(), result.length());
            return false;
        }
        return true;
    }
}