TranslationPipeline.java
package com.yumu.noveltranslator.domain.service;
import com.yumu.noveltranslator.port.dto.translation.ConsistencyTranslationResult;
import com.yumu.noveltranslator.port.dto.translation.RagTranslationResponse;
import com.yumu.noveltranslator.domain.model.Glossary;
import com.yumu.noveltranslator.enums.TranslationMode;
import com.yumu.noveltranslator.domain.service.EntityConsistencyService;
import com.yumu.noveltranslator.application.service.RagTranslationApplicationService;
import com.yumu.noveltranslator.port.out.TranslationCachePort;
import com.yumu.noveltranslator.port.out.TranslationClientPort;
import com.yumu.noveltranslator.port.out.TeamTranslationPort;
import com.yumu.noveltranslator.util.CacheKeyUtil;
import com.yumu.noveltranslator.util.ExternalResponseUtil;
import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* 统一翻译管线组件
*
* 四级翻译管线架构:
* L1: 三级缓存查询(Caffeine → Redis → 数据库)
* L2: RAG 语义匹配(向量相似度查询)
* L3: 实体一致性翻译(术语表 + 占位符保护)
* L4: 直译(Python/MTranServer 轮询)
*
* 所有翻译路径统一使用此组件,消除 TranslationService、
* MultiAgentTranslationService、TranslationTaskService 中的重复管线逻辑。
*/
@Slf4j
public class TranslationPipeline {
// 广告关键词检测列表
private static final String[] AD_KEYWORDS = {
"人工智能助手", "生成式人工智能", "体验生成式", "获取写作", "Gemini", "Google AI"
};
/** 文本超过此字符数时启用分段翻译 */
private static final int SEGMENT_TRANSLATION_THRESHOLD = 5000;
/** 分段翻译的目标段大小(字符数) */
private static final int TRANSLATION_SEGMENT_SIZE = 3000;
private final TranslationCachePort cacheService;
private final RagTranslationApplicationService ragTranslationService;
private final EntityConsistencyService entityConsistencyService;
private final TranslationClientPort translationClient;
private final TranslationPostProcessingService postProcessingService;
private final TeamTranslationPort teamTranslationService;
private final Long userId;
private final String userLevel;
private final String docId;
private final List<Glossary> glossaryTerms;
/**
* 创建翻译管线实例(标准模式,L4 走直译)
*/
public TranslationPipeline(
TranslationCachePort cacheService,
RagTranslationApplicationService ragTranslationService,
EntityConsistencyService entityConsistencyService,
TranslationClientPort translationClient,
TranslationPostProcessingService postProcessingService,
Long userId,
String docId) {
this(cacheService, ragTranslationService, entityConsistencyService, translationClient,
postProcessingService, null, userId, docId, List.of());
}
/**
* 创建翻译管线实例(支持团队模式,L4 可走 TeamTranslationPort)
*
* @param teamTranslationService 团队翻译服务(可为 null,null 时 executeTeam 降级为标准直译)
*/
public TranslationPipeline(
TranslationCachePort cacheService,
RagTranslationApplicationService ragTranslationService,
EntityConsistencyService entityConsistencyService,
TranslationClientPort translationClient,
TranslationPostProcessingService postProcessingService,
TeamTranslationPort teamTranslationService,
Long userId,
String docId) {
this(cacheService, ragTranslationService, entityConsistencyService, translationClient,
postProcessingService, teamTranslationService, userId, docId, List.of());
}
/**
* 创建翻译管线实例(完整构造,支持术语表)
*/
public TranslationPipeline(
TranslationCachePort cacheService,
RagTranslationApplicationService ragTranslationService,
EntityConsistencyService entityConsistencyService,
TranslationClientPort translationClient,
TranslationPostProcessingService postProcessingService,
TeamTranslationPort teamTranslationService,
Long userId,
String docId,
List<Glossary> glossaryTerms) {
this(cacheService, ragTranslationService, entityConsistencyService, translationClient,
postProcessingService, teamTranslationService, userId, null, docId, glossaryTerms);
}
/**
* 创建翻译管线实例(完整构造,支持术语表和 userLevel)
*/
public TranslationPipeline(
TranslationCachePort cacheService,
RagTranslationApplicationService ragTranslationService,
EntityConsistencyService entityConsistencyService,
TranslationClientPort translationClient,
TranslationPostProcessingService postProcessingService,
TeamTranslationPort teamTranslationService,
Long userId,
String userLevel,
String docId,
List<Glossary> glossaryTerms) {
this.cacheService = cacheService;
this.ragTranslationService = ragTranslationService;
this.entityConsistencyService = entityConsistencyService;
this.translationClient = translationClient;
this.postProcessingService = postProcessingService;
this.teamTranslationService = teamTranslationService;
this.userId = userId;
this.userLevel = userLevel;
this.docId = docId;
this.glossaryTerms = glossaryTerms != null ? glossaryTerms : List.of();
}
/**
* 执行完整四级翻译管线
*
* @param text 待翻译文本
* @param targetLang 目标语言
* @param mode 翻译质量档位
* @return 翻译结果,失败返回 null
*/
public String execute(String text, String targetLang, TranslationMode mode) {
// Check if segmentation is needed
List<String> segments = splitTextForTranslation(text);
if (segments.size() == 1) {
// Short text: original single-pass flow
return executeSegment(text, targetLang, mode);
}
// Long text: translate each segment and merge
log.info("分段翻译: 原文{}字, 分为{}段", text.length(), segments.size());
StringBuilder result = new StringBuilder();
for (int i = 0; i < segments.size(); i++) {
String segment = segments.get(i);
String translated = executeSegment(segment, targetLang, mode);
if (translated != null && !translated.isBlank()) {
// Strip trailing whitespace from translated segment — separator added explicitly below
String cleaned = translated.stripTrailing();
result.append(cleaned);
} else {
// Translation failed, keep original segment
result.append(segment);
}
// Add paragraph separator between segments (but not after the last one)
if (i < segments.size() - 1) {
result.append("\n\n");
}
}
return result.toString();
}
/**
* 团队模式翻译管线(完整四级管线 + L4 走多 Agent 协作)
*
* L1: 缓存查询(分层:仅 team 模式)
* L2: RAG 语义匹配
* L3: 实体一致性(提取实体 + 占位符保护)
* L4: TeamTranslationService 多 Agent 协作翻译
*
* @param text 待翻译文本
* @param sourceLang 源语言
* @param targetLang 目标语言
* @param mode 翻译质量档位(固定为 TEAM)
* @param novelType 小说类型
* @param glossaryTerms 术语表
* @return 翻译结果,失败返回 null
*/
public String executeTeam(
String text,
String sourceLang,
String targetLang,
TranslationMode mode,
String novelType,
List<Glossary> glossaryTerms) {
String cacheKey = CacheKeyUtil.buildCacheKey(text, targetLang);
// L1: 分层缓存查询
String cached = cacheService.getCacheByMode(cacheKey, mode.getName()).orElse(null);
if (cached != null) {
log.debug("Pipeline 团队模式缓存命中 [{}]", cacheKey.substring(0, Math.min(16, cacheKey.length())));
return cached;
}
// L2: RAG 语义匹配(带模式层级过滤)
RagTranslationResponse ragResult = ragTranslationService.searchSimilarWithModes(userId, text, targetLang, mode.getAllowedModes());
if (ragResult != null && ragResult.isDirectHit()) {
log.info("Pipeline 团队模式 RAG 直接命中,相似度: {}", ragResult.getSimilarity());
String result = postProcessingService.fixUntranslatedChinese(text, ragResult.getTranslation(), targetLang, mode.getName());
cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), "team");
return result;
}
// L3: 实体一致性 + 占位符保护
String textForTranslation = text;
EntityConsistencyService.EntityMappingContext mappingContext = null;
if (userId != null && entityConsistencyService.shouldUseConsistency(text)) {
log.info("Pipeline 团队模式启用实体一致性");
try {
List<String> extractedEntities = entityConsistencyService.extractEntitiesSegmented(text, targetLang);
if (!extractedEntities.isEmpty()) {
Map<String, String> entityTranslations = entityConsistencyService.translateEntities(
extractedEntities, targetLang);
mappingContext = entityConsistencyService.buildMapping(entityTranslations);
textForTranslation = entityConsistencyService.replaceEntitiesWithPlaceholders(text, mappingContext);
}
} catch (Exception e) {
log.warn("团队模式实体一致性失败,降级为无占位符翻译: {}", e.getMessage());
}
}
// L4: 多 Agent 协作翻译
if (teamTranslationService == null) {
log.warn("团队模式未初始化 TeamTranslationService,降级为标准直译");
if (translationClient == null) {
log.error("标准直译降级失败: translationClient 未初始化");
return null;
}
return executeSegment(text, targetLang, mode);
}
try {
String translated = teamTranslationService.translateChapter(
textForTranslation, novelType, sourceLang, targetLang, glossaryTerms);
if (translated == null || translated.trim().isEmpty()) {
log.warn("Pipeline 团队模式 L4 翻译结果为空");
return null;
}
// 还原占位符
if (mappingContext != null) {
try {
translated = entityConsistencyService.restorePlaceholders(translated, mappingContext);
} catch (Exception e) {
log.warn("团队模式占位符还原失败: {}", e.getMessage());
}
}
// 后处理 + 缓存
translated = postProcessingService.fixUntranslatedChinese(text, translated, targetLang, mode.getName());
if (shouldCache(text, translated)) {
cacheService.putCache(cacheKey, text, translated, sourceLang, targetLang, mode.getName(), "team");
ragTranslationService.storeTranslationMemory(text, translated, targetLang, mode.getName(), userId, mode.getName());
}
return translated;
} catch (Exception e) {
log.warn("Pipeline 团队模式翻译失败: {}", e.getMessage());
return null;
}
}
/**
* 执行单段翻译流程
*/
private String executeSegment(String text, String targetLang, TranslationMode mode) {
String cacheKey = CacheKeyUtil.buildCacheKey(text, targetLang);
// L1: 分层缓存查询
String cached = cacheService.getCacheByMode(cacheKey, mode.getName()).orElse(null);
if (cached != null) {
log.debug("Pipeline 缓存命中 mode={}, key={}", mode.getName(), cacheKey);
return cached;
}
// L2: RAG 语义匹配(带模式层级过滤)
RagTranslationResponse ragResult = ragTranslationService.searchSimilarWithModes(userId, text, targetLang, mode.getAllowedModes());
if (ragResult != null && ragResult.isDirectHit()) {
log.info("Pipeline RAG 直接命中,相似度: {}", ragResult.getSimilarity());
String result = postProcessingService.fixUntranslatedChinese(text, ragResult.getTranslation(), targetLang, mode.getName());
cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
return result;
}
// L3: 实体一致性翻译(条件触发:userId 非 null 且文本长度超阈值)
if (userId != null && entityConsistencyService.shouldUseConsistency(text)) {
log.info("Pipeline 启用实体一致性翻译");
ConsistencyTranslationResult consistencyResult =
entityConsistencyService.translateWithConsistency(text, targetLang, mode.getName(), userId, docId);
if (consistencyResult.isConsistencyApplied() && consistencyResult.getTranslatedText() != null) {
String result = postProcessingService.fixUntranslatedChinese(text, consistencyResult.getTranslatedText(), targetLang, mode.getName());
if (shouldCache(text, result)) {
cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
}
ragTranslationService.storeTranslationMemory(text, result, targetLang, mode.getName(), userId, mode.getName());
return result;
}
}
// L4: 直译(注入术语表)
String rawJson = translationClient.translate(text, targetLang, mode.getName(), false, glossaryTerms.isEmpty(), glossaryTerms, userId != null ? userId.toString() : null, userLevel);
String result = ExternalResponseUtil.extractDataField(rawJson);
if (result == null) {
log.warn("Pipeline L4 翻译失败,原始响应: {}", rawJson);
return null;
}
if (!isValidTranslation(text, result)) {
log.warn("Pipeline L4 翻译结果无效(广告关键词或长度异常)");
return null;
}
// 后处理 + 缓存
result = postProcessingService.fixUntranslatedChinese(text, result, targetLang, mode.getName());
if (shouldCache(text, result)) {
cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
ragTranslationService.storeTranslationMemory(text, result, targetLang, mode.getName(), userId, mode.getName());
} else {
log.debug("Pipeline 译文与原文一致,跳过缓存");
}
return result;
}
/**
* 快速模式翻译管线(仅缓存 + 直译)
* 跳过 RAG 和实体一致性,适用于网页翻译等高性能场景
*
* @param text 待翻译文本
* @param targetLang 目标语言
* @param mode 翻译质量档位
* @return 翻译结果,失败时返回原文
*/
public String executeFast(String text, String targetLang, TranslationMode mode) {
return executeFast(text, targetLang, mode, false);
}
/**
* 快速模式翻译管线(仅缓存 + 直译)
*
* @param text 待翻译文本
* @param targetLang 目标语言
* @param mode 翻译质量档位
* @param html 是否启用 HTML 翻译模式(仅对 MTranServer 有效)
* @return 翻译结果,失败时返回原文
*/
public String executeFast(String text, String targetLang, TranslationMode mode, boolean html) {
log.info("[PIPELINE-ENTRY] executeFast START: textLen={}, target={}, mode={}, html={}",
text.length(), targetLang, mode.getName(), html);
String cacheKey = CacheKeyUtil.buildCacheKey(text, targetLang) + (html ? ":html" : ":text");
log.info("[PIPELINE-L1] Cache key: {}", cacheKey.substring(0, Math.min(32, cacheKey.length())));
// L1: 分层缓存查询
String cached = cacheService.getCacheByMode(cacheKey, mode.getName()).orElse(null);
if (cached != null) {
log.info("[PIPELINE-L1] 缓存命中 mode={}", mode.getName());
return cached;
}
log.info("[PIPELINE-L1] 缓存未命中,继续 L4");
// L4: 直译(跳过 RAG 和一致性,快速模式直连 MTranServer,注入术语表)
// 有术语表时强制走 Python 服务(MTranServer 不支持术语表)
try {
boolean hasGlossary = !glossaryTerms.isEmpty();
log.info("[PIPELINE-L4] 调用翻译客户端: hasGlossary={}, textLen={}", hasGlossary, text.length());
String rawJson = translationClient.translate(text, targetLang, mode.getName(), html, !hasGlossary, glossaryTerms, userId != null ? userId.toString() : null, userLevel);
log.info("[PIPELINE-L4] 翻译客户端返回: rawJsonLen={}", rawJson != null ? rawJson.length() : 0);
String result = ExternalResponseUtil.extractDataField(rawJson);
log.info("[PIPELINE-L4] 提取译文: result={}", result != null ? result.substring(0, Math.min(50, result.length())) : "null");
if (result != null && !result.isBlank()) {
if (!isValidTranslation(text, result)) {
log.warn("Pipeline 快速模式翻译结果无效,返回原文");
return text;
}
result = postProcessingService.fixUntranslatedChinese(text, result, targetLang, mode.getName());
if (shouldCache(text, result)) {
cacheService.putCache(cacheKey, text, result, "auto", targetLang, mode.getName(), mode.getName());
}
log.info("[PIPELINE-EXIT] 翻译成功: {}", result.substring(0, Math.min(50, result.length())));
return result;
}
} catch (Exception e) {
log.warn("Pipeline 快速模式翻译失败: {}", e.getMessage(), e);
}
// 失败时返回原文
log.warn("Pipeline 快速模式翻译结果为空,返回原文");
return text;
}
/**
* 将长文本按段落边界切分为多个片段,用于分段翻译
*
* 规则:
* - 文本 <= 5000 字:不分段,返回单片段
* - 文本 > 5000 字:按 3000 字分段
* - 切分点在段落边界(\n\n)或句子边界(。!?\n)
* - 不破坏原有文字完整性
*/
private static List<String> splitTextForTranslation(String text) {
if (text == null || text.length() <= SEGMENT_TRANSLATION_THRESHOLD) {
return List.of(text != null ? text : "");
}
List<String> segments = new ArrayList<>();
// Split at paragraph boundaries
String[] paragraphs = text.split("(?<=\n\n)");
StringBuilder current = new StringBuilder();
for (String para : paragraphs) {
if (current.length() + para.length() > TRANSLATION_SEGMENT_SIZE && current.length() > 0) {
String segment = current.toString();
if (segment.length() > TRANSLATION_SEGMENT_SIZE * 1.5) {
segments.addAll(splitAtSentenceBoundaryForTranslation(segment));
} else {
segments.add(segment);
}
current = new StringBuilder();
}
current.append(para);
}
if (current.length() > 0) {
String remaining = current.toString();
if (remaining.length() > TRANSLATION_SEGMENT_SIZE * 1.5) {
segments.addAll(splitAtSentenceBoundaryForTranslation(remaining));
} else {
segments.add(remaining);
}
}
if (segments.isEmpty()) {
return List.of(text);
}
return segments;
}
/**
* 在句子边界切分超长片段
*/
private static List<String> splitAtSentenceBoundaryForTranslation(String text) {
List<String> parts = new ArrayList<>();
String[] sentences = text.split("(?<=[。!?\n])");
StringBuilder current = new StringBuilder();
for (String sentence : sentences) {
if (current.length() + sentence.length() > TRANSLATION_SEGMENT_SIZE && current.length() > 0) {
parts.add(current.toString());
current = new StringBuilder();
}
current.append(sentence);
}
if (current.length() > 0) {
parts.add(current.toString());
}
return parts.isEmpty() ? List.of(text) : parts;
}
/**
* 判断是否应该缓存翻译结果
* 仅当译文与原文不一致时才缓存
*/
public static boolean shouldCache(String original, String translated) {
if (original == null || translated == null) {
return false;
}
String cleanOriginal = original.trim();
String cleanTranslated = translated.trim();
if (cleanOriginal.equals(cleanTranslated)) {
return false;
}
if (cleanOriginal.equalsIgnoreCase(cleanTranslated)) {
return false;
}
return true;
}
/**
* 校验翻译结果是否有效
* 检测非翻译内容(如广告文案、系统提示等)和长度异常
*/
public static boolean isValidTranslation(String text, String result) {
if (text == null || result == null) {
return false;
}
// 检测明显的广告/系统提示关键词
for (String keyword : AD_KEYWORDS) {
if (result.contains(keyword)) {
log.warn("翻译结果包含广告关键词:{}", keyword);
return false;
}
}
// 检测译文长度异常(超过原文 10 倍)
if (result.length() > text.length() * 10) {
log.warn("翻译结果长度异常:原文 {} 字符,译文 {} 字符", text.length(), result.length());
return false;
}
return true;
}
}