让子弹飞
120.34M · 2026-03-13
langchain4j-production/
├── src/
│ ├── main/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── example/
│ │ │ └── ai/
│ │ │ ├── config/ # 配置层
│ │ │ │ ├── LangChainConfig.java
│ │ │ │ ├── VectorStoreConfig.java
│ │ │ │ ├── SecurityConfig.java
│ │ │ │ └── MonitoringConfig.java
│ │ │ ├── domain/ # 领域层
│ │ │ │ ├── model/
│ │ │ │ │ ├── ChatMessage.java
│ │ │ │ │ ├── Document.java
│ │ │ │ │ └── SearchResult.java
│ │ │ │ ├── service/
│ │ │ │ │ ├── ChatService.java
│ │ │ │ │ ├── EmbeddingService.java
│ │ │ │ │ └── DocumentService.java
│ │ │ │ └── repository/
│ │ │ │ ├── VectorStoreRepository.java
│ │ │ │ └── ConversationRepository.java
│ │ │ ├── infrastructure/ # 基础设施层
│ │ │ │ ├── ai/
│ │ │ │ │ ├── LLMClient.java
│ │ │ │ │ ├── EmbeddingClient.java
│ │ │ │ │ └── VectorStoreClient.java
│ │ │ │ ├── cache/
│ │ │ │ │ ├── CacheManager.java
│ │ │ │ │ └── EmbeddingCache.java
│ │ │ │ ├── security/
│ │ │ │ │ ├── InputValidator.java
│ │ │ │ │ ├── OutputSanitizer.java
│ │ │ │ │ └── RateLimiter.java
│ │ │ │ └── monitoring/
│ │ │ │ ├── MetricsCollector.java
│ │ │ │ └── HealthIndicator.java
│ │ │ ├── api/ # API层
│ │ │ │ ├── controller/
│ │ │ │ │ ├── ChatController.java
│ │ │ │ │ ├── DocumentController.java
│ │ │ │ │ └── HealthController.java
│ │ │ │ ├── dto/
│ │ │ │ │ ├── ChatRequest.java
│ │ │ │ │ ├── ChatResponse.java
│ │ │ │ │ └── ErrorResponse.java
│ │ │ │ └── exception/
│ │ │ │ ├── GlobalExceptionHandler.java
│ │ │ │ └── RateLimitException.java
│ │ │ └── Application.java
│ │ └── resources/
│ │ ├── application.yml
│ │ ├── application-dev.yml
│ │ ├── application-prod.yml
│ │ ├── logback-spring.xml
│ │ └── db/
│ │ └── migration/
│ │ └── V1__init_schema.sql
│ └── test/
│ ├── java/
│ │ └── com/
│ │ └── example/
│ │ └── ai/
│ │ ├── integration/ # 集成测试
│ │ │ ├── ChatServiceIntegrationTest.java
│ │ │ └── VectorStoreIntegrationTest.java
│ │ ├── e2e/ # E2E测试
│ │ │ └── ChatFlowE2ETest.java
│ │ └── unit/ # 单元测试
│ │ ├── ChatServiceTest.java
│ │ ├── RateLimiterTest.java
│ │ └── InputValidatorTest.java
│ └── resources/
│ ├── application-test.yml
│ └── test-data/
├── docker/
│ ├── Dockerfile
│ ├── docker-compose.yml
│ └── init-scripts/
├── k8s/
│ ├── deployment.yaml
│ ├── service.yaml
│ ├── configmap.yaml
│ ├── secret.yaml
│ └── ingress.yaml
├── scripts/
│ ├── deploy.sh
│ ├── rollback.sh
│ └── health-check.sh
├── docs/
│ ├── API.md
│ ├── DEPLOYMENT.md
│ └── TROUBLESHOOTING.md
├── build.gradle.kts
├── settings.gradle.kts
├── .env.example
├── .gitignore
└── README.md
plugins {
id("org.springframework.boot") version "3.2.0"
id("io.spring.dependency-management") version "1.1.4"
kotlin("jvm") version "1.9.20"
kotlin("plugin.spring") version "1.9.20"
}
group = "com.example.ai"
version = "1.0.0"
java.sourceCompatibility = JavaVersion.VERSION_21
repositories {
mavenCentral()
}
dependencies {
// Spring Boot核心
implementation("org.springframework.boot:spring-boot-starter-web")
implementation("org.springframework.boot:spring-boot-starter-actuator")
implementation("org.springframework.boot:spring-boot-starter-validation")
implementation("org.springframework.boot:spring-boot-starter-cache")
implementation("org.springframework.boot:spring-boot-starter-data-jpa")
// LangChain4J核心
implementation("dev.langchain4j:langchain4j:0.36.2")
implementation("dev.langchain4j:langchain4j-spring-boot-starter:0.36.2")
implementation("dev.langchain4j:langchain4j-open-ai:0.36.2")
implementation("dev.langchain4j:langchain4j-embeddings-all-minilm-l6-v2:0.36.2")
// 向量存储
implementation("dev.langchain4j:langchain4j-pgvector:0.36.2")
implementation("dev.langchain4j:langchain4j-qdrant:0.36.2")
// 缓存
implementation("com.github.ben-manes.caffeine:caffeine:3.1.8")
// 坚控和指标
implementation("io.micrometer:micrometer-registry-prometheus")
implementation("io.micrometer:micrometer-tracing-bridge-brave")
// 安全
implementation("org.springframework.boot:spring-boot-starter-security")
implementation("io.jsonwebtoken:jjwt-api:0.12.3")
runtimeOnly("io.jsonwebtoken:jjwt-impl:0.12.3")
runtimeOnly("io.jsonwebtoken:jjwt-jackson:0.12.3")
// 限流
implementation("com.bucket4j:bucket4j-core:8.7.0")
implementation("com.bucket4j:bucket4j-redis:8.7.0")
// 数据库
implementation("org.postgresql:postgresql")
implementation("org.flywaydb:flyway-core")
// 工具
implementation("org.projectlombok:lombok")
annotationProcessor("org.projectlombok:lombok")
// 测试
testImplementation("org.springframework.boot:spring-boot-starter-test")
testImplementation("org.springframework.security:spring-security-test")
testImplementation("org.testcontainers:testcontainers:1.19.3")
testImplementation("org.testcontainers:postgresql:1.19.3")
testImplementation("org.testcontainers:junit-jupiter:1.19.3")
testImplementation("org.mockito:mockito-core")
testImplementation("org.mockito:mockito-junit-jupiter")
testImplementation("com.github.tomakehurst:wiremock-jre8:2.35.0")
}
tasks.withType<Test> {
useJUnitPlatform()
}
@Configuration
@EnableConfigurationProperties(LangChainProperties.class)
public class LangChainConfig {
private final LangChainProperties properties;
public LangChainConfig(LangChainProperties properties) {
this.properties = properties;
}
@Bean
public ChatLanguageModel ch@tLanguageModel() {
return OpenAiChatModel.builder()
.apiKey(properties.getApiKey())
.modelName(properties.getModelName())
.temperature(properties.getTemperature())
.timeout(Duration.ofSeconds(properties.getTimeout()))
.maxRetries(properties.getMaxRetries())
.logRequests(properties.isLogRequests())
.logResponses(properties.isLogResponses())
.build();
}
@Bean
public EmbeddingModel embeddingModel() {
return OpenAiEmbeddingModel.builder()
.apiKey(properties.getApiKey())
.modelName(properties.getEmbeddingModelName())
.timeout(Duration.ofSeconds(properties.getTimeout()))
.build();
}
@Bean
public EmbeddingStore<TextSegment> embeddingStore(DataSource dataSource) {
return PgVectorEmbeddingStore.builder()
.dataSource(dataSource)
.table(properties.getVectorStore().getTable())
.dimension(properties.getVectorStore().getDimension())
.build();
}
@Bean
public ContentRetriever contentRetriever(
EmbeddingStore<TextSegment> embeddingStore,
EmbeddingModel embeddingModel) {
return EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.maxResults(properties.getRetrieval().getMaxResults())
.minScore(properties.getRetrieval().getMinScore())
.build();
}
}
# application.yml
langchain4j:
api-key: ${OPENAI_API_KEY}
model-name: ${OPENAI_MODEL_NAME:gpt-4}
spring:
datasource:
url: ${DATABASE_URL}
username: ${DATABASE_USERNAME}
password: ${DATABASE_PASSWORD}
# .env文件(不要提交到Git)
OPENAI_API_KEY=sk-proj-xxxxxxxxxxxxx
OPENAI_MODEL_NAME=gpt-4
DATABASE_URL=jdbc:postgresql://localhost:5432/langchain4j
DATABASE_USERNAME=postgres
DATABASE_PASSWORD=secret
@Configuration
public class VaultConfig {
@Bean
public VaultTemplate vaultTemplate() {
VaultEndpoint endpoint = VaultEndpoint.create("vault.example.com", 8200);
VaultToken token = VaultToken.of(System.getenv("VAULT_TOKEN"));
return new VaultTemplate(
endpoint,
new TokenAuthentication(token)
);
}
@Bean
public String openAiApiKey(VaultTemplate vaultTemplate) {
VaultResponse response = vaultTemplate
.read("secret/data/langchain4j/openai");
return (String) response
.getRequiredData()
.get("api-key");
}
}
@Component
public class InputValidator {
private static final int MAX_INPUT_LENGTH = 4000;
private static final Pattern INJECTION_PATTERN =
Pattern.compile("(DROP|DELETE|UPDATE|INSERT|EXEC|SCRIPT)",
Pattern.CASE_INSENSITIVE);
public void validateChatInput(String input) {
// 检查空值
if (input == null || input.trim().isEmpty()) {
throw new ValidationException("输入不能为空");
}
// 检查长度
if (input.length() > MAX_INPUT_LENGTH) {
throw new ValidationException(
"输入超过最大长度限制: " + MAX_INPUT_LENGTH
);
}
// 检查注入攻击
if (INJECTION_PATTERN.matcher(input).find()) {
throw new SecurityException("检测到潜在的注入攻击");
}
// 检查特殊字符
if (containsMaliciousCharacters(input)) {
throw new SecurityException("输入包含不允许的特殊字符");
}
}
private boolean containsMaliciousCharacters(String input) {
// 检查控制字符、零宽字符等
return input.codePoints().anyMatch(cp ->
Character.isISOControl(cp) && cp != 'n' && cp != 'r' && cp != 't'
);
}
public void validateDocumentUpload(MultipartFile file) {
// 检查文件大小
if (file.getSize() > 10 * 1024 * 1024) { // 10MB
throw new ValidationException("文件大小超过10MB限制");
}
// 检查文件类型
String contentType = file.getContentType();
List<String> allowedTypes = Arrays.asList(
"application/pdf",
"text/plain",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
);
if (!allowedTypes.contains(contentType)) {
throw new ValidationException("不支持的文件类型: " + contentType);
}
// 检查文件名
String filename = file.getOriginalFilename();
if (filename == null || filename.contains("..")) {
throw new SecurityException("非法的文件名");
}
}
}
@Component
public class OutputSanitizer {
private static final Pattern PII_PATTERN = Pattern.compile(
// 匹配邮箱
"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b|" +
// 匹配手机号(中国)
"\b1[3-9]\d{9}\b|" +
// 匹配身分证号
"\b\d{17}[\dXx]\b"
);
private static final Pattern API_KEY_PATTERN = Pattern.compile(
"(?i)(api[_-]?key|secret|token|password)\s*[:=]\s*['"]?([\w\-]+)['"]?"
);
public String sanitizeOutput(String output) {
if (output == null) {
return null;
}
String sanitized = output;
// 移除PII信息
sanitized = removePII(sanitized);
// 移除API密钥
sanitized = removeApiKeys(sanitized);
// HTML转义(防止XSS)
sanitized = HtmlUtils.htmlEscape(sanitized);
return sanitized;
}
private String removePII(String text) {
Matcher matcher = PII_PATTERN.matcher(text);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(result, "[已隐藏]");
}
matcher.appendTail(result);
return result.toString();
}
private String removeApiKeys(String text) {
Matcher matcher = API_KEY_PATTERN.matcher(text);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
String key = matcher.group(1);
matcher.appendReplacement(result, key + ": [REDACTED]");
}
matcher.appendTail(result);
return result.toString();
}
}
@Component
public class UserRateLimiter {
private final Map<String, Bucket> buckets = new ConcurrentHashMap<>();
private final RateLimitConfig config;
public UserRateLimiter(RateLimitConfig config) {
this.config = config;
}
public boolean allowRequest(String userId) {
Bucket bucket = buckets.computeIfAbsent(userId, this::createBucket);
return bucket.tryConsume(1);
}
private Bucket createBucket(String userId) {
// 不同用户级别不同的限流策略
UserTier tier = getUserTier(userId);
Bandwidth limit = switch (tier) {
case FREE -> Bandwidth.builder()
.capacity(10) // 10次请求
.refillGreedy(10, Duration.ofMinutes(1)) // 每分钟
.build();
case PRO -> Bandwidth.builder()
.capacity(100)
.refillGreedy(100, Duration.ofMinutes(1))
.build();
case ENTERPRISE -> Bandwidth.builder()
.capacity(1000)
.refillGreedy(1000, Duration.ofMinutes(1))
.build();
};
return Bucket.builder()
.addLimit(limit)
.build();
}
private UserTier getUserTier(String userId) {
// 从数据库或缓存中获取用户级别
return config.getUserTier(userId);
}
// 定期清理不活跃的bucket
@Scheduled(fixedRate = 3600000) // 每小时
public void cleanup() {
buckets.entrySet().removeIf(entry ->
entry.getValue().getAvailableTokens() ==
entry.getValue().getAvailableTokens() // 简化示例
);
}
}
@RestControllerAdvice
public class RateLimitInterceptor {
private final UserRateLimiter rateLimiter;
@Around("@annotation(RateLimited)")
public Object checkRateLimit(ProceedingJoinPoint joinPoint) throws Throwable {
String userId = SecurityContextHolder.getContext()
.getAuthentication()
.getName();
if (!rateLimiter.allowRequest(userId)) {
throw new RateLimitExceededException(
"请求频率超限,请稍后再试"
);
}
return joinPoint.proceed();
}
}
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface RateLimited {
}
@ExtendWith(MockitoExtension.class)
class ChatServiceTest {
@Mock
private ChatLanguageModel ch@tModel;
@Mock
private InputValidator inputValidator;
@Mock
private OutputSanitizer outputSanitizer;
@InjectMocks
private ChatService ch@tService;
@Test
@DisplayName("应该成功处理有效的聊天请求")
void shouldProcessValidChatRequest() {
// Given
String userMessage = "什么是LangChain4J?";
String expectedResponse = "LangChain4J是一个Java的AI框架...";
when(ch@tModel.generate(userMessage))
.thenReturn(Response.from(expectedResponse));
when(outputSanitizer.sanitizeOutput(expectedResponse))
.thenReturn(expectedResponse);
// When
String result = ch@tService.ch@t(userMessage);
// Then
assertEquals(expectedResponse, result);
verify(inputValidator).validateChatInput(userMessage);
verify(ch@tModel).generate(userMessage);
verify(outputSanitizer).sanitizeOutput(expectedResponse);
}
@Test
@DisplayName("应该拒绝过长的输入")
void shouldRejectTooLongInput() {
// Given
String longMessage = "x".repeat(5000);
doThrow(new ValidationException("输入过长"))
.when(inputValidator).validateChatInput(longMessage);
// When & Then
assertThrows(ValidationException.class, () -> {
ch@tService.ch@t(longMessage);
});
verify(ch@tModel, never()).generate(any());
}
@Test
@DisplayName("应该处理LLM超时")
void shouldHandleLLMTimeout() {
// Given
String message = "测试消息";
when(ch@tModel.generate(message))
.thenThrow(new RuntimeException("Timeout"));
// When & Then
assertThrows(ChatServiceException.class, () -> {
ch@tService.ch@t(message);
});
}
@Test
@DisplayName("应该过滤输出中的敏感信息")
void shouldFilterSensitiveInfoInOutput() {
// Given
String message = "我的联系方式";
String rawResponse = "您的邮箱是test@example.com,手机号是13812345678";
String sanitizedResponse = "您的邮箱是[已隐藏],手机号是[已隐藏]";
when(ch@tModel.generate(message))
.thenReturn(Response.from(rawResponse));
when(outputSanitizer.sanitizeOutput(rawResponse))
.thenReturn(sanitizedResponse);
// When
String result = ch@tService.ch@t(message);
// Then
assertEquals(sanitizedResponse, result);
assertFalse(result.contains("test@example.com"));
assertFalse(result.contains("13812345678"));
}
}
@SpringBootTest
@Testcontainers
@ActiveProfiles("test")
class ChatServiceIntegrationTest {
@Container
static PostgreSQLContainer<?> postgres = new PostgreSQLContainer<>("postgres:16")
.withDatabaseName("testdb")
.withUsername("test")
.withPassword("test");
@DynamicPropertySource
static void configureProperties(DynamicPropertyRegistry registry) {
registry.add("spring.datasource.url", postgres::getJdbcUrl);
registry.add("spring.datasource.username", postgres::getUsername);
registry.add("spring.datasource.password", postgres::getPassword);
}
@Autowired
private ChatService ch@tService;
@Autowired
private ConversationRepository conversationRepository;
@Test
@DisplayName("端到端聊天流程应该正常工作")
void endToEndChatFlowShouldWork() {
// Given
String conversationId = UUID.randomUUID().toString();
String message = "什么是向量数据库?";
// When
ChatResponse response = ch@tService.ch@t(conversationId, message);
// Then
assertNotNull(response);
assertNotNull(response.getMessage());
assertTrue(response.getMessage().length() > 0);
// 验证会话被保存
Optional<Conversation> saved = conversationRepository.findById(conversationId);
assertTrue(saved.isPresent());
assertEquals(2, saved.get().getMessages().size()); // 用户消息 + AI回复
}
@Test
@DisplayName("应该维护对话上下文")
void shouldMaintainConversationContext() {
// Given
String conversationId = UUID.randomUUID().toString();
// When - 第一轮对话
ch@tService.ch@t(conversationId, "我的名字是张三");
// When - 第二轮对话
ChatResponse response = ch@tService.ch@t(conversationId, "我叫什么名字?");
// Then
assertTrue(
response.getMessage().contains("张三"),
"AI应该记住之前对话中的名字"
);
}
@Test
@DisplayName("RAG流程应该检索相关文档")
void ragFlowShouldRetrieveRelevantDocuments() {
// Given - 先添加一些文档
documentService.addDocument(
"LangChain4J是一个用于构建AI应用的Java框架"
);
documentService.addDocument(
"向量数据库用于存储和检索嵌入向量"
);
// When
ChatResponse response = ch@tService.ch@tWithRAG("什么是LangChain4J?");
// Then
assertNotNull(response);
assertNotNull(response.getSources());
assertFalse(response.getSources().isEmpty());
assertTrue(
response.getMessage().contains("Java框架"),
"回答应该基于检索到的文档"
);
}
}
@SpringBootTest(webEnvironment = WebEnvironment.RANDOM_PORT)
@Testcontainers
@AutoConfigureMockMvc
class ChatFlowE2ETest {
@Container
static PostgreSQLContainer<?> postgres = new PostgreSQLContainer<>("postgres:16")
.withDatabaseName("testdb");
@Container
static GenericContainer<?> qdrant = new GenericContainer<>("qdrant/qdrant:latest")
.withExposedPorts(6333)
.waitingFor(Wait.forHttp("/health").forStatusCode(200));
@Autowired
private MockMvc mockMvc;
@Autowired
private ObjectMapper objectMapper;
@DynamicPropertySource
static void configureProperties(DynamicPropertyRegistry registry) {
registry.add("spring.datasource.url", postgres::getJdbcUrl);
registry.add("spring.datasource.username", postgres::getUsername);
registry.add("spring.datasource.password", postgres::getPassword);
registry.add("langchain4j.qdrant.host", qdrant::getHost);
registry.add("langchain4j.qdrant.port", qdrant::getFirstMappedPort);
}
@Test
@DisplayName("完整的RAG流程E2E测试")
void completeRAGFlowE2E() throws Exception {
// Step 1: 上传文档
MockMultipartFile file = new MockMultipartFile(
"file",
"test.txt",
"text/plain",
"LangChain4J是一个Java AI框架".getBytes()
);
mockMvc.perform(multipart("/api/documents")
.file(file))
.andExpect(status().isOk())
.andExpect(jsonPath("$.documentId").exists());
// Step 2: 等待文档被索引
Thread.sleep(2000);
// Step 3: 发送查询
ChatRequest request = new ChatRequest();
request.setMessage("什么是LangChain4J?");
request.setUseRAG(true);
MvcResult result = mockMvc.perform(post("/api/ch@t")
.contentType(MediaType.APPLICATION_JSON)
.content(objectMapper.writeValueAsString(request)))
.andExpect(status().isOk())
.andExpect(jsonPath("$.message").exists())
.andExpect(jsonPath("$.sources").isArray())
.andReturn();
// Step 4: 验证响应
ChatResponse response = objectMapper.readValue(
result.getResponse().getContentAsString(),
ChatResponse.class
);
assertTrue(response.getSources().size() > 0);
assertTrue(response.getMessage().contains("Java"));
}
@Test
@DisplayName("限流机制E2E测试")
void rateLimitingE2E() throws Exception {
ChatRequest request = new ChatRequest();
request.setMessage("测试消息");
// 发送多个请求直到触发限流
for (int i = 0; i < 15; i++) {
ResultActions result = mockMvc.perform(post("/api/ch@t")
.contentType(MediaType.APPLICATION_JSON)
.content(objectMapper.writeValueAsString(request)));
if (i < 10) {
result.andExpect(status().isOk());
} else {
result.andExpect(status().isTooManyRequests());
}
}
}
@Test
@DisplayName("健康检查E2E测试")
void healthCheckE2E() throws Exception {
mockMvc.perform(get("/actuator/health"))
.andExpect(status().isOk())
.andExpect(jsonPath("$.status").value("UP"))
.andExpect(jsonPath("$.components.db.status").value("UP"))
.andExpect(jsonPath("$.components.vectorStore.status").value("UP"));
}
}
# 多阶段构建
FROM gradle:8.5-jdk21 AS builder
WORKDIR /app
# 复制构建文件
COPY build.gradle.kts settings.gradle.kts ./
COPY src ./src
# 构建应用
RUN gradle clean build -x test --no-daemon
# 运行时镜像
FROM eclipse-temurin:21-jre-alpine
# 添加非root用户
RUN addgroup -S spring && adduser -S spring -G spring
WORKDIR /app
# 复制构建产物
COPY --from=builder /app/build/libs/*.jar app.jar
# 修改所有者
RUN chown -R spring:spring /app
# 切换到非root用户
USER spring:spring
# 健康检查
HEALTHCHECK --interval=30s --timeout=3s --start-period=60s --retries=3
CMD wget --no-verbose --tries=1 --spider || exit 1
# 暴露端口
EXPOSE 8080
# JVM参数优化
ENV JAVA_OPTS="-XX:+UseContainerSupport
-XX:MaxRAMPercentage=75.0
-XX:+UseG1GC
-XX:+ExitOnOutOfMemoryError
-Djava.security.egd=file:/dev/./urandom"
# 启动应用
ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar app.jar"]
version: '3.8'
services:
app:
build: .
ports:
- "8080:8080"
environment:
- SPRING_PROFILES_ACTIVE=prod
- OPENAI_API_KEY=${OPENAI_API_KEY}
- DATABASE_URL=jdbc:postgresql://postgres:5432/langchain4j
- DATABASE_USERNAME=langchain4j
- DATABASE_PASSWORD=${DB_PASSWORD}
- QDRANT_HOST=qdrant
- QDRANT_PORT=6333
depends_on:
postgres:
condition: service_healthy
qdrant:
condition: service_started
restart: unless-stopped
networks:
- langchain4j-network
deploy:
resources:
limits:
cpus: '2'
memory: 2G
reservations:
cpus: '1'
memory: 1G
postgres:
image: pgvector/pgvector:pg16
environment:
- POSTGRES_DB=langchain4j
- POSTGRES_USER=langchain4j
- POSTGRES_PASSWORD=${DB_PASSWORD}
ports:
- "5432:5432"
volumes:
- postgres-data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U langchain4j"]
interval: 10s
timeout: 5s
retries: 5
networks:
- langchain4j-network
qdrant:
image: qdrant/qdrant:v1.7.4
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant-data:/qdrant/storage
networks:
- langchain4j-network
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
networks:
- langchain4j-network
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
volumes:
- grafana-data:/var/lib/grafana
networks:
- langchain4j-network
volumes:
postgres-data:
qdrant-data:
prometheus-data:
grafana-data:
networks:
langchain4j-network:
driver: bridge
apiVersion: apps/v1
kind: Deployment
metadata:
name: langchain4j-app
namespace: production
labels:
app: langchain4j
version: v1.0.0
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: langchain4j
template:
metadata:
labels:
app: langchain4j
version: v1.0.0
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/actuator/prometheus"
spec:
serviceAccountName: langchain4j-sa
# 初始化容器 - 等待数据库就绪
initContainers:
- name: wait-for-postgres
image: busybox:1.36
command:
- sh
- -c
- |
until nc -z postgres-service 5432; do
echo "Waiting for PostgreSQL..."
sleep 2
done
containers:
- name: langchain4j
image: your-registry.com/langchain4j:v1.0.0
imagePullPolicy: Always
ports:
- name: http
containerPort: 8080
protocol: TCP
env:
- name: SPRING_PROFILES_ACTIVE
value: "prod"
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: langchain4j-secrets
key: openai-api-key
- name: DATABASE_URL
valueFrom:
configMapKeyRef:
name: langchain4j-config
key: database-url
- name: DATABASE_USERNAME
valueFrom:
secretKeyRef:
name: langchain4j-secrets
key: db-username
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
name: langchain4j-secrets
key: db-password
# 资源限制
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "2000m"
memory: "2Gi"
# 存活探针
livenessProbe:
httpGet:
path: /actuator/health/liveness
port: 8080
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 3
# 就绪探针
readinessProbe:
httpGet:
path: /actuator/health/readiness
port: 8080
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
# 启动探针
startupProbe:
httpGet:
path: /actuator/health
port: 8080
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 3
failureThreshold: 30
# 优雅关闭
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 15"]
# 优雅关闭时间
terminationGracePeriodSeconds: 30
# Pod反亲和性 - 不同节点部署
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- langchain4j
topologyKey: kubernetes.io/hostname
apiVersion: v1
kind: Service
metadata:
name: langchain4j-service
namespace: production
labels:
app: langchain4j
spec:
type: ClusterIP
selector:
app: langchain4j
ports:
- name: http
port: 80
targetPort: 8080
protocol: TCP
sessionAffinity: ClientIP
sessionAffinityConfig:
clientIP:
timeoutSeconds: 10800
apiVersion: v1
kind: ConfigMap
metadata:
name: langchain4j-config
namespace: production
data:
database-url: "jdbc:postgresql://postgres-service:5432/langchain4j"
qdrant-host: "qdrant-service"
qdrant-port: "6333"
application.yml: |
server:
port: 8080
shutdown: graceful
spring:
application:
name: langchain4j-app
lifecycle:
timeout-per-shutdown-phase: 20s
management:
endpoints:
web:
exposure:
include: health,prometheus,info,metrics
health:
livenessState:
enabled: true
readinessState:
enabled: true
metrics:
export:
prometheus:
enabled: true
langchain4j:
model-name: gpt-4
temperature: 0.7
max-tokens: 2000
timeout: 60
max-retries: 3
apiVersion: v1
kind: Secret
metadata:
name: langchain4j-secrets
namespace: production
type: Opaque
stringData:
openai-api-key: "sk-proj-xxxxxxxxxxxxx"
db-username: "langchain4j"
db-password: "your-secure-password"
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: langchain4j-ingress
namespace: production
annotations:
kubernetes.io/ingress.class: nginx
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/rate-limit: "100"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
tls:
- hosts:
- api.example.com
secretName: langchain4j-tls
rules:
- host: api.example.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: langchain4j-service
port:
number: 80
.builder()模式,提高可读性// Demo代码示例
public class DemoChat {
public static void main(String[] args) {
// 硬编码API密钥
String apiKey = "sk-proj-xxxxx";
// 直接使用,没有错误处理
ChatLanguageModel model = OpenAiChatModel.builder()
.apiKey(apiKey)
.modelName("gpt-4")
.build();
// 没有输入验证
String response = model.generate("用户输入");
// 直接输出,没有过滤
System.out.println(response);
}
}
// 生产级代码
@Service
@Slf4j
public class ProductionChatService {
private final ChatLanguageModel model;
private final InputValidator validator;
private final OutputSanitizer sanitizer;
private final MetricsCollector metrics;
private final CircuitBreaker circuitBreaker;
// 依赖注入,配置外部化
public ProductionChatService(
ChatLanguageModel model,
InputValidator validator,
OutputSanitizer sanitizer,
MetricsCollector metrics,
CircuitBreakerRegistry circuitBreakerRegistry) {
this.model = model;
this.validator = validator;
this.sanitizer = sanitizer;
this.metrics = metrics;
this.circuitBreaker = circuitBreakerRegistry.circuitBreaker("ch@t-service");
}
@Transactional
@RateLimited
public ChatResponse ch@t(ChatRequest request) {
// 参数验证
validator.validateChatInput(request.getMessage());
// 记录指标
Timer.Sample sample = Timer.start(metrics.getRegistry());
try {
// 使用熔断器保护
String response = circuitBreaker.executeSupplier(() -> {
try {
return model.generate(request.getMessage());
} catch (Exception e) {
log.error("LLM调用失败", e);
throw new ChatServiceException("AI服务暂时不可用", e);
}
});
// 输出过滤
String sanitized = sanitizer.sanitizeOutput(response);
// 记录成功
sample.stop(metrics.timer("ch@t.success"));
return ChatResponse.builder()
.message(sanitized)
.timestamp(Instant.now())
.build();
} catch (CallNotPermittedException e) {
// 熔断降级
log.warn("熔断器打开,返回降级响应");
sample.stop(metrics.timer("ch@t.circuit_open"));
return getFallbackResponse();
} catch (Exception e) {
// 错误处理
log.error("聊天服务异常", e);
sample.stop(metrics.timer("ch@t.error"));
throw new ChatServiceException("处理请求时发生错误", e);
}
}
private ChatResponse getFallbackResponse() {
return ChatResponse.builder()
.message("抱歉,AI服务当前繁忙,请稍后重试")
.isFallback(true)
.timestamp(Instant.now())
.build();
}
}
| Demo | Production |
|---|---|
| 硬编码配置 | 环境变量/配置文件 |
| 单一配置 | 多环境配置(dev/test/prod) |
| 明文密钥 | Vault/加密 |
| Demo | Production |
|---|---|
| 没有异常处理 | 完整的try-catch |
| 打印堆栈 | 结构化日志 |
| 应用崩溃 | 优雅降级 |
| Demo | Production |
|---|---|
| 单次调用 | 重试+熔断 |
| 同步阻塞 | 异步+超时 |
| 无坚控 | 完整坚控告警 |
| Demo | Production |
|---|---|
| 无缓存 | 多层缓存 |
| 串行处理 | 批量+异步 |
| 无限流 | 限流+降级 |
| Demo | Production |
|---|---|
| 无验证 | 输入验证+输出过滤 |
| 无认证 | JWT/OAuth2 |
| HTTP | HTTPS + 证书 |
| Demo | Production |
|---|---|
| 手工测试 | 自动化测试套件 |
| 无测试 | 80%+覆盖率 |
| 本地验证 | CI/CD流水线 |
需求:
提示:
需求:
提示:
需求:
提示:
最后更新:2026-03-09 字数统计:5,500 字 预计阅读时间:45 分钟