Please Note: There are over 100 more detailed code examples in Java, JavaScript and the REST API below.
new MockServerClient("localhost", 1080)
.when(
request()
.withMethod("POST")
.withPath("/login")
.withBody("{username: 'foo', password: 'bar'}")
)
.respond(
response()
.withStatusCode(302)
.withCookie(
"sessionId", "2By8LOhBmaW5nZXJwcmludCIlMDAzMW"
)
.withHeader(
"Location", "https://www.mock-server.com"
)
);
Please Note: There are over 100 more detailed code examples in Java, JavaScript and the REST API below.
var mockServerClient = require('mockserver-client').mockServerClient;
mockServerClient("localhost", 1080).mockAnyResponse({
"httpRequest": {
"method": "POST",
"path": "/login",
"body": {
"username": "foo",
"password": "bar"
}
},
"httpResponse": {
"statusCode": 302,
"headers": {
"Location": [
"https://www.mock-server.com"
]
},
"cookies": {
"sessionId": "2By8LOhBmaW5nZXJwcmludCIlMDAzMW"
}
}
}).then(
function () {
console.log("expectation created");
},
function (error) {
console.log(error);
}
);
To use the Java client add the org.mock-server:mockserver-client-java-no-dependencies:{{ site.mockserver_version }} dependency. The -no-dependencies artifact bundles all dependencies with relocated packages, so it declares zero transitive dependencies — this avoids classpath conflicts and CVE scanning noise from unused transitive dependencies.
For more details about the different dependency versions see the page on Maven Central
for example in maven:
<dependency>
<groupId>org.mock-server</groupId>
<artifactId>mockserver-client-java-no-dependencies</artifactId>
<version>{{ site.mockserver_version }}</version>
</dependency>
A request matcher expectation may contain:
open api expectations are also supported using an OpenAPI v3 specifications to generate request matcher expectations for each operation, see the section on open api expectations for details.
MockServer will match (or play) active expectations in the exact order they are added (if their priority is identical). For example, if an expectation A is added with Times.exactly(3) then expectation B is added with Times.exactly(2) with the same request matcher they will be applied in the following order A, A, A, B, B. Priority can be used to alter the order that expectations are matched; matching is ordered by priority (highest first) then creation (earliest first).
Priority can be used to configure a default expectation or response by specifying a negative value for priority and a very lax request matcher; the lax request matcher ensures the default expectation is always matched, but the low priority ensure it is matched last after all other expectations.
An expectation can be configured with a percentage (0-100) to enable probabilistic matching. When set, the expectation will only match the specified percentage of requests that structurally match the request matcher. This is useful for simulating intermittent failures, flaky services, or A/B testing scenarios.
For example, setting percentage to 50 means approximately half of matching requests will be handled by this expectation, while the other half will fall through to the next matching expectation or the default behavior.
If percentage is not set or set to 100, the expectation matches all structurally matching requests (the default behavior). A value of 0 means the expectation never matches.
Stateful scenarios let you model multi-step API conversations where MockServer's response changes based on prior interactions. Each scenario is an independent named state machine that starts in the "Started" state.
An expectation can participate in a scenario by specifying:
If scenarioName is not set, the expectation behaves normally without any scenario filtering. The default initial state for all scenarios is "Started".
Example - Login Flow: First call returns a token, subsequent calls using that token get a different response.
// Step 1: Login returns a token (scenario starts in "Started" state)
{
"httpRequest": {
"method": "POST",
"path": "/login"
},
"httpResponse": {
"statusCode": 200,
"body": "{\"token\": \"abc123\"}"
},
"scenarioName": "LoginFlow",
"scenarioState": "Started",
"newScenarioState": "LoggedIn",
"times": { "remainingTimes": 1 }
}
// Step 2: After login, GET /profile returns user data
{
"httpRequest": {
"method": "GET",
"path": "/profile"
},
"httpResponse": {
"statusCode": 200,
"body": "{\"name\": \"Alice\", \"email\": \"alice@example.com\"}"
},
"scenarioName": "LoginFlow",
"scenarioState": "LoggedIn"
}
// Step 3: Before login, GET /profile returns 401
{
"httpRequest": {
"method": "GET",
"path": "/profile"
},
"httpResponse": {
"statusCode": 401,
"body": "{\"error\": \"Not authenticated\"}"
},
"scenarioName": "LoginFlow",
"scenarioState": "Started"
}
Example - Pagination: Successive calls to the same endpoint return different pages of data.
// Page 1 (initial state)
{
"httpRequest": {
"method": "GET",
"path": "/items"
},
"httpResponse": {
"statusCode": 200,
"body": "{\"items\": [\"a\", \"b\"], \"page\": 1, \"hasMore\": true}"
},
"scenarioName": "Pagination",
"scenarioState": "Started",
"newScenarioState": "Page2",
"times": { "remainingTimes": 1 }
}
// Page 2
{
"httpRequest": {
"method": "GET",
"path": "/items"
},
"httpResponse": {
"statusCode": 200,
"body": "{\"items\": [\"c\", \"d\"], \"page\": 2, \"hasMore\": true}"
},
"scenarioName": "Pagination",
"scenarioState": "Page2",
"newScenarioState": "Page3",
"times": { "remainingTimes": 1 }
}
// Page 3 (last page)
{
"httpRequest": {
"method": "GET",
"path": "/items"
},
"httpResponse": {
"statusCode": 200,
"body": "{\"items\": [\"e\"], \"page\": 3, \"hasMore\": false}"
},
"scenarioName": "Pagination",
"scenarioState": "Page3"
}
Example - Retry Testing: First two calls return 503, third call succeeds.
// First call: 503
{
"httpRequest": { "path": "/api/data" },
"httpResponse": { "statusCode": 503, "body": "{\"error\": \"Service Unavailable\"}" },
"scenarioName": "RetryTest",
"scenarioState": "Started",
"newScenarioState": "Attempt2",
"times": { "remainingTimes": 1 }
}
// Second call: 503
{
"httpRequest": { "path": "/api/data" },
"httpResponse": { "statusCode": 503, "body": "{\"error\": \"Service Unavailable\"}" },
"scenarioName": "RetryTest",
"scenarioState": "Attempt2",
"newScenarioState": "Ready",
"times": { "remainingTimes": 1 }
}
// Third call onwards: 200
{
"httpRequest": { "path": "/api/data" },
"httpResponse": { "statusCode": 200, "body": "{\"data\": \"success\"}" },
"scenarioName": "RetryTest",
"scenarioState": "Ready"
}
Example - Order State Machine: Model an order lifecycle with state transitions.
// Create order
{
"httpRequest": { "method": "POST", "path": "/orders" },
"httpResponse": { "statusCode": 201, "body": "{\"id\": 1, \"status\": \"CREATED\"}" },
"scenarioName": "OrderLifecycle",
"scenarioState": "Started",
"newScenarioState": "OrderCreated",
"times": { "remainingTimes": 1 }
}
// Pay for order (only available after creation)
{
"httpRequest": { "method": "PUT", "path": "/orders/1/pay" },
"httpResponse": { "statusCode": 200, "body": "{\"id\": 1, \"status\": \"PAID\"}" },
"scenarioName": "OrderLifecycle",
"scenarioState": "OrderCreated",
"newScenarioState": "OrderPaid",
"times": { "remainingTimes": 1 }
}
// Get order - returns CREATED status
{
"httpRequest": { "method": "GET", "path": "/orders/1" },
"httpResponse": { "statusCode": 200, "body": "{\"id\": 1, \"status\": \"CREATED\"}" },
"scenarioName": "OrderLifecycle",
"scenarioState": "OrderCreated"
}
// Get order - returns PAID status
{
"httpRequest": { "method": "GET", "path": "/orders/1" },
"httpResponse": { "statusCode": 200, "body": "{\"id\": 1, \"status\": \"PAID\"}" },
"scenarioName": "OrderLifecycle",
"scenarioState": "OrderPaid"
}
Example - Rate Limiting: Allow a fixed number of calls, then return 429.
// First 3 calls succeed
{
"httpRequest": { "path": "/api/resource" },
"httpResponse": { "statusCode": 200, "body": "{\"data\": \"ok\"}" },
"scenarioName": "RateLimit",
"scenarioState": "Started",
"newScenarioState": "Call2",
"times": { "remainingTimes": 1 }
}
{
"httpRequest": { "path": "/api/resource" },
"httpResponse": { "statusCode": 200, "body": "{\"data\": \"ok\"}" },
"scenarioName": "RateLimit",
"scenarioState": "Call2",
"newScenarioState": "Call3",
"times": { "remainingTimes": 1 }
}
{
"httpRequest": { "path": "/api/resource" },
"httpResponse": { "statusCode": 200, "body": "{\"data\": \"ok\"}" },
"scenarioName": "RateLimit",
"scenarioState": "Call3",
"newScenarioState": "RateLimited",
"times": { "remainingTimes": 1 }
}
// Subsequent calls: 429 Too Many Requests
{
"httpRequest": { "path": "/api/resource" },
"httpResponse": { "statusCode": 429, "body": "{\"error\": \"Rate limit exceeded\"}" },
"scenarioName": "RateLimit",
"scenarioState": "RateLimited"
}
Example - Multi-Turn LLM Conversation: Script a two-turn agent loop where the model first calls a tool, then answers after receiving the tool result. Each turn advances the scenario state automatically. Use isolateBy so concurrent sessions (identified by a header, query parameter, or cookie) maintain independent state.
import static org.mockserver.client.LlmConversationBuilder.conversation;
import static org.mockserver.model.Completion.completion;
import static org.mockserver.model.ToolUse.toolUse;
import static org.mockserver.llm.IsolationSource.header;
conversation()
.withPath("/v1/messages")
.withProvider(Provider.ANTHROPIC)
.withModel("claude-sonnet-4")
.isolateBy(header("x-session-id"))
.turn()
.whenTurnIndex(0)
.respondingWith(
completion()
.withToolCall(toolUse("search").withArguments("{\"query\":\"weather\"}"))
.withStopReason("tool_use"))
.andThen()
.turn()
.whenContainsToolResultFor("search")
.respondingWith(
completion()
.withText("Based on the search results, it will be sunny.")
.withStopReason("end_turn"))
.applyTo(mockServerClient);
JSON equivalent (two expectations sharing a scenario):
// Turn 1: tool call
{
"httpRequest": { "method": "POST", "path": "/v1/messages" },
"httpLlmResponse": {
"provider": "ANTHROPIC",
"model": "claude-sonnet-4",
"completion": {
"toolCalls": [{ "name": "search", "arguments": "{\"query\":\"weather\"}" }],
"stopReason": "tool_use"
},
"conversationPredicates": { "turnIndex": 0 }
},
"scenarioName": "__llm_conv_example",
"scenarioState": "Started",
"newScenarioState": "turn_1"
}
// Turn 2: final answer after tool result
{
"httpRequest": { "method": "POST", "path": "/v1/messages" },
"httpLlmResponse": {
"provider": "ANTHROPIC",
"model": "claude-sonnet-4",
"completion": {
"text": "Based on the search results, it will be sunny.",
"stopReason": "end_turn"
},
"conversationPredicates": { "containsToolResultFor": "search" }
},
"scenarioName": "__llm_conv_example",
"scenarioState": "turn_1",
"newScenarioState": "__done"
}
Scenarios are independent of each other — different scenario names maintain separate state. All scenario states are reset when MockServer is reset.
Beyond expectation-driven state transitions, MockServer supports timed auto-transitions and external triggers via REST endpoints. These let you model scenarios where state changes happen after a time delay (e.g. a background process completing) or are driven externally by a test harness.
Get current state:
# Returns {"scenarioName": "DeployFlow", "currentState": "Started"}
curl -X GET http://localhost:1080/mockserver/scenario/DeployFlow
Set state immediately:
# Set the scenario to "Deploying" state
curl -X PUT http://localhost:1080/mockserver/scenario/DeployFlow \
-H "Content-Type: application/json" \
-d '{"state": "Deploying"}'
Set state with timed auto-transition: The scenario enters "Deploying" immediately, then automatically transitions to "Deployed" after 5 seconds (if still in "Deploying").
curl -X PUT http://localhost:1080/mockserver/scenario/DeployFlow \
-H "Content-Type: application/json" \
-d '{"state": "Deploying", "transitionAfterMs": 5000, "nextState": "Deployed"}'
External trigger: Advance the scenario to a specific state from your test harness or CI pipeline.
# Jump directly to "Failed" state
curl -X PUT http://localhost:1080/mockserver/scenario/DeployFlow/trigger \
-H "Content-Type: application/json" \
-d '{"newState": "Failed"}'
Example - Deployment Pipeline Flow: Combine expectation-based matching with timed transitions to model a deployment that takes time to complete.
// Step 1: Register expectations for each state
// When Deploying, GET /status returns "in progress"
{
"httpRequest": { "method": "GET", "path": "/status" },
"httpResponse": { "statusCode": 200, "body": "{\"status\": \"deploying\"}" },
"scenarioName": "DeployFlow",
"scenarioState": "Deploying"
}
// When Deployed, GET /status returns "complete"
{
"httpRequest": { "method": "GET", "path": "/status" },
"httpResponse": { "statusCode": 200, "body": "{\"status\": \"complete\"}" },
"scenarioName": "DeployFlow",
"scenarioState": "Deployed"
}
// Step 2: Start the flow with a timed transition
// PUT /mockserver/scenario/DeployFlow
// Body: {"state": "Deploying", "transitionAfterMs": 3000, "nextState": "Deployed"}
//
// Now: GET /status returns "deploying"
// After 3 seconds: GET /status returns "complete"
Timed transitions are guarded: the transition only fires if the scenario is still in the expected currentState when the timer expires. If the scenario has already moved to a different state (e.g. via an external trigger or expectation match), the timed transition becomes a no-op. Scheduling a new transition for the same scenario cancels any pending one.
Scenario state transitions are not limited to HTTP request matches. A DNS query, WebSocket connection, gRPC request, or HTTP request can drive a scenario forward via a cross-protocol trigger. This lets you model multi-protocol test flows — for example: "when a WebSocket connects, advance scenario X so the next HTTP expectation for that state becomes active."
Add a crossProtocolScenarios array to an expectation. Each entry specifies:
Example — WebSocket connect advances an HTTP scenario: When any WebSocket connection is established, the "ConnectionFlow" scenario transitions to "WsConnected", activating the HTTP expectation that requires that state.
// Expectation 1: dummy WebSocket endpoint that fires the cross-protocol transition
{
"httpRequest": {
"method": "GET",
"path": "/ws/events"
},
"httpResponse": {
"statusCode": 101
},
"crossProtocolScenarios": [
{
"trigger": "WEBSOCKET_CONNECT",
"scenarioName": "ConnectionFlow",
"targetState": "WsConnected"
}
]
}
// Expectation 2: only active after the WebSocket has connected
{
"httpRequest": {
"method": "GET",
"path": "/api/status"
},
"httpResponse": {
"statusCode": 200,
"body": "{\"status\": \"connected\"}"
},
"scenarioName": "ConnectionFlow",
"scenarioState": "WsConnected"
}
Example — DNS query activates a downstream-failure scenario: When a DNS query for payments.svc is observed, advance the scenario to "DnsObserved" so a chaos-injecting HTTP expectation becomes active.
{
"httpRequest": { "method": "GET", "path": "/_internal/dns-trigger" },
"httpResponse": { "statusCode": 200 },
"crossProtocolScenarios": [
{
"trigger": "DNS_QUERY",
"matchPattern": "payments.svc",
"scenarioName": "PaymentsFlow",
"targetState": "DnsObserved"
}
]
}
The matchPattern is a substring match against the event identifier. For DNS_QUERY it is the query name; for GRPC_REQUEST it is the gRPC service name; for HTTP_REQUEST it is the request path; for WEBSOCKET_CONNECT it is the WebSocket URL. When matchPattern is omitted, all events of that trigger type match. Multiple entries in crossProtocolScenarios are evaluated independently — a single event can advance several scenarios in one fire.
A single expectation can return multiple different responses by providing an array of responses in the httpResponses field. Each time the expectation matches, the next response in the list is returned. After the last response, it cycles back to the first.
The responseMode field controls how responses are selected:
Note: Use httpResponses instead of httpResponse (singular) when you want multiple responses. If both are set, httpResponses takes priority.
Example - Sequential Responses: Cycle through different status codes.
{
"httpRequest": {
"path": "/api/status"
},
"httpResponses": [
{ "statusCode": 200, "body": "{\"status\": \"ok\"}" },
{ "statusCode": 503, "body": "{\"status\": \"degraded\"}" },
{ "statusCode": 200, "body": "{\"status\": \"ok\"}" }
]
}
The first request returns 200, the second returns 503, the third returns 200, then it cycles back to the first response.
Example - Random Responses: Simulate unpredictable service behavior.
{
"httpRequest": {
"path": "/api/unstable"
},
"httpResponses": [
{ "statusCode": 200, "body": "{\"result\": \"success\"}" },
{ "statusCode": 500, "body": "{\"error\": \"Internal Server Error\"}" },
{ "statusCode": 429, "body": "{\"error\": \"Too Many Requests\"}" }
],
"responseMode": "RANDOM"
}
Example - Java Client:
import static org.mockserver.model.HttpRequest.request;
import static org.mockserver.model.HttpResponse.response;
new MockServerClient("localhost", 1080)
.when(request().withPath("/api/status"))
.respond(
java.util.Arrays.asList(
response().withStatusCode(200).withBody("{\"status\": \"ok\"}"),
response().withStatusCode(503).withBody("{\"status\": \"degraded\"}")
)
);
If an expectation is added and the id field matches an existing expectation the existing expectation will be updated (i.e. replaced). A UUID will be used assigned to each expectation if no value for id is specified.
There are two types of request matcher:
A request properties matcher matches requests using one or more of the following properties:
Matching for properties can be done using:
Note: path values containing { or } characters (such as /api/{id}) are interpreted as regex patterns, not literal strings. This is a common source of unexpected matching behaviour when path templates are used. To match literal curly braces in a path, use one of the following approaches:
Matching for key to multiple values supports multiple values for each key for headers, query parameters and path parameters
Note: for query parameters, the default sub set matching mode means that extra query parameters not specified in the matcher are allowed and do not cause a match failure. To enforce strict matching where only the specified query parameters are allowed (and any additional parameters cause a mismatch), use KeyMatchStyle.MATCHING_KEY on the request matcher. For example:
request()
.withPath("/some/path")
.withQueryStringParameters(
new Parameters(
param("key", "value")
).withKeyMatchStyle(KeyMatchStyle.MATCHING_KEY)
)
Matching for key to single value supports a single value for each key for cookies
Important: when matching JSON bodies, there is a significant difference between plain string matching and semantic JSON matching:
If your expectations are not matching JSON requests as expected, ensure you are using json() rather than passing the JSON string directly.
Matching for bodies can be done using:
MockServer supports whitespace-insensitive matching of GraphQL over HTTP JSON request bodies. This allows expectations to match GraphQL queries, mutations, and subscriptions regardless of whitespace, formatting, or comment differences. The request body must be a JSON object with a query field (the standard GraphQL over HTTP format).
A GraphQL body matcher can specify:
Example - match a GraphQL query:
{
"httpRequest": {
"method": "POST",
"path": "/graphql",
"body": {
"type": "GRAPHQL",
"query": "{ user(id: 1) { name email } }"
}
},
"httpResponse": {
"statusCode": 200,
"body": "{\"data\": {\"user\": {\"name\": \"Alice\", \"email\": \"alice@example.com\"}}}"
}
}
Example - match with operation name and variables schema:
{
"httpRequest": {
"method": "POST",
"path": "/graphql",
"body": {
"type": "GRAPHQL",
"query": "query GetUser($id: ID!) { user(id: $id) { name email } }",
"operationName": "GetUser",
"variablesSchema": "{\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}}, \"required\": [\"id\"]}"
}
},
"httpResponse": {
"statusCode": 200,
"body": "{\"data\": {\"user\": {\"name\": \"Alice\", \"email\": \"alice@example.com\"}}}"
}
}
Example - match a mutation:
{
"httpRequest": {
"method": "POST",
"path": "/graphql",
"body": {
"type": "GRAPHQL",
"query": "mutation CreateUser($input: CreateUserInput!) { createUser(input: $input) { id name } }",
"operationName": "CreateUser"
}
},
"httpResponse": {
"statusCode": 200,
"body": "{\"data\": {\"createUser\": {\"id\": \"123\", \"name\": \"Alice\"}}}"
}
}
Example - AST subset matching (match any query containing a "users" field):
{
"httpRequest": {
"method": "POST",
"path": "/graphql",
"body": {
"type": "GRAPHQL",
"query": "query { users { id } }",
"selectionSetMatchType": "AST_SUBSET",
"fields": ["users"]
}
},
"httpResponse": {
"statusCode": 200,
"body": "{\"data\": {\"users\": [{\"id\": \"1\", \"name\": \"Alice\"}]}}"
}
}
Example - AST exact matching (match a query with exactly these top-level fields):
{
"httpRequest": {
"method": "POST",
"path": "/graphql",
"body": {
"type": "GRAPHQL",
"query": "query GetDashboard { user profile settings }",
"selectionSetMatchType": "AST_EXACT"
}
},
"httpResponse": {
"statusCode": 200,
"body": "{\"data\": {\"user\": {}, \"profile\": {}, \"settings\": {}}}"
}
}
MockServer supports mocking GraphQL subscriptions over WebSocket using the graphql-transport-ws protocol (also accepts the legacy graphql-ws subprotocol). This allows testing GraphQL subscription clients without a real GraphQL server.
The protocol flow is:
To configure a GraphQL subscription mock, use an httpWebSocketResponse with:
Example - mock a GraphQL subscription that pushes two events:
{
"httpRequest": {
"method": "GET",
"path": "/graphql"
},
"httpWebSocketResponse": {
"subprotocol": "graphql-transport-ws",
"graphqlSubscriptionFilter": {
"query": "subscription { userUpdated { id name } }",
"selectionSetMatchType": "AST_SUBSET"
},
"messages": [
{"text": "{\"id\": \"1\", \"name\": \"Alice\"}"},
{"text": "{\"id\": \"2\", \"name\": \"Bob\"}", "delay": {"timeUnit": "MILLISECONDS", "value": 500}}
]
}
}
Example - Java API:
mockServerClient.when(
request()
.withMethod("GET")
.withPath("/graphql")
).respondWithWebSocket(
webSocketResponse()
.withSubprotocol("graphql-transport-ws")
.withGraphqlSubscriptionFilter(
GraphQLBody.graphQL("subscription { userUpdated { id name } }")
.withSelectionSetMatchType(SelectionSetMatchType.AST_SUBSET)
)
.withMessage(webSocketMessage("{\"id\": \"1\", \"name\": \"Alice\"}"))
.withMessage(webSocketMessage("{\"id\": \"2\", \"name\": \"Bob\"}"))
);
When a client connects and sends:
{"type": "connection_init"}
// MockServer replies: {"type": "connection_ack"}
{"id": "1", "type": "subscribe", "payload": {"query": "subscription { userUpdated { id name } }"}}
// MockServer replies:
// {"id": "1", "type": "next", "payload": {"data": {"id": "1", "name": "Alice"}}}
// {"id": "1", "type": "next", "payload": {"data": {"id": "2", "name": "Bob"}}}
// {"id": "1", "type": "complete"}
Note: The legacy graphql-ws subprotocol (used by the older subscriptions-transport-ws library) is also accepted. Both use the same message format in MockServer's implementation.
An open api request matcher can contain any of the following fields:
MockServer creates a set of request properties matchers for each open api request matcher, to ensures control-plane logic such as clearing expectations or retrieving expectations work consistently between the two types of request matchers, this can be viewed in the MockServer UI active expectations section.
Actions can be one of the following types:
If no action is present for a request because no request matcher was matched then:
A response action can be:
either a response literal containing any of the following:
or a templated response using javascript or velocity with a delay
or a callback used to dynamically generate a response based on the request:
as a server side callback implemented as a java class that has a default constructor, implements org.mockserver.mock.action.ExpectationResponseCallback and is available on the classpath
as a client side callback implemented as a closure using the java or javascript clients
A forward action can be:
either an exact forwarder, that forwards requests exactly as it receives them, containing the following:
or an overridden request (or overridden response), with a delay (including distribution-based delays), that allows any part of a forwarded request or response to be replaced or certain fields (path, headers, cookies or query parameters) to be modified
or a templated forwarder using javascript or velocity, with a delay, that allows requests to be modified or completely re-written before they are forwarded
or a callback used to dynamically generate the request to forward based on the request received by MockServer:
as a server side callback implemented as a java class that has a default constructor, implements org.mockserver.mock.action.ExpectationForwardCallback or org.mockserver.mock.action.ExpectationForwardAndResponseCallback and is available on the classpath
as a client side callback implemented as a closure using the java or javascript clients
A forward with fallback action (httpForwardWithFallback) forwards the request to an upstream service, but returns a pre-configured fallback response when the upstream returns an error status code or the connection fails. This is unique to MockServer's hybrid mock+proxy architecture.
Use cases:
new MockServerClient("localhost", 1080)
.when(
request()
.withPath("/api/downstream")
)
.forwardWithFallback(
forwardWithFallback()
.withForward(
forward()
.withHost("downstream-service.example.com")
.withPort(443)
.withScheme(HttpForward.Scheme.HTTPS)
)
.withFallback(
response()
.withStatusCode(200)
.withBody("{\"status\": \"cached\", \"data\": []}")
)
.withFallbackOnStatusCodes(500, 502, 503, 504)
.withFallbackOnTimeout(true)
);
{
"httpRequest": {
"path": "/api/downstream"
},
"httpForwardWithFallback": {
"httpForward": {
"host": "downstream-service.example.com",
"port": 443,
"scheme": "HTTPS"
},
"fallbackResponse": {
"statusCode": 200,
"body": "{\"status\": \"cached\", \"data\": []}"
},
"fallbackOnStatusCodes": [500, 502, 503, 504],
"fallbackOnTimeout": true
}
}
An error action can return an invalid response as a sequence of bytes or drop the connection (with an optional delay)
An LLM response action (httpLlmResponse) returns a provider-correct response from a high-level description of what the model should say. Instead of hand-assembling Anthropic or OpenAI response JSON, you describe the intent (text, tool calls, usage, stop reason) and MockServer produces the byte-correct wire format.
Seven providers have full codec support: ANTHROPIC, OPENAI (Chat Completions), OPENAI_RESPONSES, GEMINI, BEDROCK, AZURE_OPENAI, and OLLAMA — each produces the byte-correct wire format for both single completions and streaming. A request naming an unrecognised provider returns a structured 400 response listing the supported providers.
import static org.mockserver.client.LlmMockBuilder.llmMock;
import static org.mockserver.model.Completion.completion;
import static org.mockserver.model.Provider.ANTHROPIC;
import static org.mockserver.model.ToolUse.toolUse;
import static org.mockserver.model.Usage.usage;
// Simple text completion
llmMock("/v1/messages")
.withProvider(ANTHROPIC)
.withModel("claude-sonnet-4")
.respondingWith(
completion()
.withText("The capital of France is Paris.")
.withStopReason("end_turn")
.withUsage(usage().withInputTokens(42).withOutputTokens(8))
)
.applyTo(mockServerClient);
// Tool / function call
llmMock("/v1/messages")
.withProvider(ANTHROPIC)
.respondingWith(
completion()
.withText("Let me check the weather.")
.withToolCall(toolUse("get_weather").withArguments("{\"city\":\"Paris\"}"))
.withStopReason("tool_use")
)
.applyTo(mockServerClient);
{
"httpRequest": {
"method": "POST",
"path": "/v1/messages"
},
"httpLlmResponse": {
"provider": "ANTHROPIC",
"model": "claude-sonnet-4",
"completion": {
"text": "The capital of France is Paris.",
"stopReason": "end_turn",
"usage": { "inputTokens": 42, "outputTokens": 8 }
}
}
}
When streaming is enabled, MockServer expands the completion into provider-correct streaming events with configurable timing physics. Most providers use SSE (Server-Sent Events with text/event-stream) — for example, message_start through message_stop for Anthropic, and chat.completion.chunk deltas for OpenAI. Two providers use alternative wire formats: Ollama uses native NDJSON (newline-delimited JSON with application/x-ndjson), and Bedrock uses the AWS event-stream binary framing (application/vnd.amazon.eventstream) where each streaming chunk is a binary message containing a base64-wrapped JSON payload — matching the InvokeModelWithResponseStream wire format. MockServer emits the correct wire format automatically based on the provider.
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.mockserver.client.Llm.jitter;
import static org.mockserver.client.Llm.timeToFirstToken;
import static org.mockserver.client.Llm.tokensPerSecond;
import static org.mockserver.client.LlmMockBuilder.llmMock;
import static org.mockserver.model.Completion.completion;
import static org.mockserver.model.Provider.OPENAI;
llmMock("/v1/chat/completions")
.withProvider(OPENAI)
.withModel("gpt-4o")
.respondingWith(
completion()
.withText("Streaming token by token...")
.streaming()
.withStreamingPhysics(
timeToFirstToken(300, MILLISECONDS),
tokensPerSecond(50),
jitter(0.2))
)
.applyTo(mockServerClient);
For OpenAI embeddings, deterministicFromInput() generates reproducible vectors seeded from the input text. Same input + same dimensions + same seed produces an identical L2-normalised vector across JVMs.
import static org.mockserver.client.Llm.embedding;
import static org.mockserver.client.LlmMockBuilder.llmMock;
import static org.mockserver.model.Provider.OPENAI;
llmMock("/v1/embeddings")
.withProvider(OPENAI)
.respondingWith(
embedding()
.withDimensions(1536)
.deterministicFromInput()
)
.applyTo(mockServerClient);
Attach an outputSchema (a JSON Schema) to a completion and MockServer validates the response text against it as the response is encoded. Validation is fail-soft: a mismatch never changes the response body — it adds an x-mockserver-structured-output-invalid response header and logs a warning, so a malformed structured-output fixture is surfaced without breaking the test (a blank or invalid schema, or a missing text, is a no-op). To assert schema conformance over already-recorded traffic instead, use the verify_structured_output tool.
import static org.mockserver.client.LlmMockBuilder.llmMock;
import static org.mockserver.model.Completion.completion;
import static org.mockserver.model.Provider.ANTHROPIC;
llmMock("/v1/messages")
.withProvider(ANTHROPIC)
.respondingWith(
completion()
.withText("{\"city\":\"Paris\",\"country\":\"France\"}")
.withOutputSchema("{\"type\":\"object\",\"required\":[\"city\",\"country\"]}")
)
.applyTo(mockServerClient);
A chaos block on the LLM response injects faults for resilience testing: probabilistic provider errors (e.g. 429/529 with a Retry-After header), mid-stream truncation, malformed SSE, and a stateful request quota. The quota is a deterministic fixed-window rate limit — expectations sharing a quotaName share one counter, so requests past quotaLimit within quotaWindowMillis are rejected with quotaErrorStatus (default 429) and the retryAfter header (the count resets when the window elapses and on server reset). The chaos block is set via the JSON definition (below) or the mock_llm_completion MCP tool, where the full field reference lives.
{
"httpRequest": {
"method": "POST",
"path": "/v1/messages"
},
"httpLlmResponse": {
"provider": "ANTHROPIC",
"completion": {
"text": "The capital of France is Paris."
},
"chaos": {
"quotaName": "anthropic-account",
"quotaLimit": 100,
"quotaWindowMillis": 60000,
"quotaErrorStatus": 429,
"retryAfter": "30"
}
}
}
For multi-turn LLM conversations with stateful scenario progression, see the LLM Conversation example in the Stateful Scenarios section.
A WebSocket response action (httpWebSocketResponse) accepts an HTTP Upgrade request and turns the connection into a WebSocket session. Two modes are available:
Both modes can be combined in a single expectation: MockServer sends any messages immediately after the upgrade, then waits for incoming frames and applies matchers for each one received.
Each entry in the matchers array contains:
{
"httpRequest": {
"method": "GET",
"path": "/ws/chat"
},
"httpWebSocketResponse": {
"messages": [
{"text": "{\"type\": \"connected\"}"}
],
"matchers": [
{
"frameType": "TEXT",
"textMatcher": "ping",
"responses": [
{"text": "{\"type\": \"pong\"}"}
]
},
{
"frameType": "TEXT",
"textMatcher": "subscribe",
"responses": [
{"text": "{\"type\": \"ack\"}"},
{"text": "{\"type\": \"data\", \"value\": 42}"}
]
}
],
"closeConnection": false
}
}
In this example, MockServer sends {"type": "connected"} immediately after the WebSocket upgrade. If the client then sends a frame containing ping, MockServer replies with {"type": "pong"}. A frame containing subscribe triggers a two-message reply sequence. Frames that match no entry are silently ignored.
Bidirectional WebSocket expectations are also autherable in the dashboard Composer — select WebSocket response as the action type and use the Bidirectional frame matchers panel to add entries.
CRUD Simulation lets you quickly stand up a fully functional RESTful resource without writing individual expectations for each HTTP method. You provide a base path and optional configuration, and MockServer automatically generates five endpoints that manage an in-memory collection of JSON objects.
This is useful when your tests need a realistic data store (e.g. users, products, orders) that supports create, read, update, and delete operations without manually defining expectations for each verb.
Given a basePath of /api/users, MockServer generates:
| Method | Path | Description | Success Status |
|---|---|---|---|
| GET | /api/users | List all items | 200 |
| POST | /api/users | Create a new item | 201 |
| GET | /api/users/{id} | Get item by ID | 200 |
| PUT | /api/users/{id} | Update item by ID | 200 |
| DELETE | /api/users/{id} | Delete item by ID | 204 |
A CRUD simulation is defined by a JSON object with the following fields:
Register a CRUD simulation using PUT /mockserver/crud:
curl -X PUT "http://localhost:1080/mockserver/crud" \
-H "Content-Type: application/json" \
-d '{
"basePath": "/api/users",
"idField": "id",
"idStrategy": "AUTO_INCREMENT",
"initialData": [
{"name": "Alice", "email": "alice@example.com"},
{"name": "Bob", "email": "bob@example.com"}
]
}'
After registration, the endpoints are immediately available:
# List all users
curl http://localhost:1080/api/users
# Create a new user
curl -X POST http://localhost:1080/api/users \
-H "Content-Type: application/json" \
-d '{"name": "Charlie", "email": "charlie@example.com"}'
# Get user by ID
curl http://localhost:1080/api/users/1
# Update user
curl -X PUT http://localhost:1080/api/users/1 \
-H "Content-Type: application/json" \
-d '{"name": "Alice Updated", "email": "alice@example.com"}'
# Delete user
curl -X DELETE http://localhost:1080/api/users/3
new MockServerClient("localhost", 1080)
.crud(
new CrudExpectationsDefinition()
.withBasePath("/api/users")
.withIdField("id")
.withIdStrategy(CrudExpectationsDefinition.IdStrategy.AUTO_INCREMENT)
);
curl -X PUT "http://localhost:1080/mockserver/crud" \
-H "Content-Type: application/json" \
-d '{
"basePath": "/api/products",
"idStrategy": "UUID"
}'
# Create a product - ID will be a UUID like "550e8400-e29b-41d4-a716-446655440000"
curl -X POST http://localhost:1080/api/products \
-H "Content-Type: application/json" \
-d '{"name": "Widget", "price": 9.99}'
CRUD simulations are independent of regular expectations and are cleared when MockServer is reset. You can register multiple CRUD simulations with different base paths simultaneously.