Skip to content

Instantly share code, notes, and snippets.

@benallard
Created March 22, 2025 11:09
Show Gist options
  • Save benallard/9a13669089d4b496f220418bdafb7bfa to your computer and use it in GitHub Desktop.
Save benallard/9a13669089d4b496f220418bdafb7bfa to your computer and use it in GitHub Desktop.
Streaming JSON Parsing with WebFlux & Jackson

Streaming JSON Parsing with WebFlux & Jackson

Introduction

This project demonstrates how to efficiently parse and stream large JSON payloads using Spring WebFlux and Jackson's non-blocking JSON parser.

Why does this matter?

Traditional JSON parsing waits for the entire payload before processing, causing memory issues for large data streams. WebFlux allows reactive streaming, but Jackson doesn’t natively emit partial string tokens—we worked around this limitation! This implementation ensures low memory footprint, early data processing, and non-blocking I/O for high-performance JSON handling.

Core Features

  • True Reactive JSON Parsing – Processes data as it arrives, no waiting for the full payload.
  • Jackson's Non-Blocking JSON Parser – Uses JsonFactory.createNonBlockingByteBufferParser() to incrementally parse JSON from a stream.
  • WebFlux Integration – Fully non-blocking with Flux to handle high-throughput JSON streams.
  • Handles Large JSON Objects Efficiently – Supports strings up to 20MB (configurable).
  • Memory-Efficient – Avoids loading entire JSON documents into memory.
package ben.test;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.async.ByteBufferFeeder;
import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
import org.springframework.core.io.buffer.DataBuffer;
import org.springframework.core.io.buffer.DataBufferUtils;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import reactor.core.publisher.Flux;
import reactor.core.publisher.SynchronousSink;
import reactor.core.scheduler.Schedulers;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
@RestController
public class StreamingController {
private final JsonFactory jsonFactory = JsonFactory.builder().build();
@PostMapping(value = "/stream-json", consumes = MediaType.APPLICATION_JSON_VALUE)
public Flux<String> streamJson(@RequestBody Flux<DataBuffer> body) throws IOException {
return Flux.create(sink -> {
try (JsonParser parser = jsonFactory.createNonBlockingByteBufferParser()) {
ByteBufferFeeder feeder = (ByteBufferFeeder) parser.getNonBlockingInputFeeder();
body.subscribe(
buf -> processBuffer(buf, feeder, parser, sink), // Process each buffer chunk
sink::error, // Handle errors
() -> completeProcessing(feeder, sink) // Complete the stream
);
} catch (IOException e) {
sink.error(e);
}
});
}
private void processBuffer(DataBuffer buf, ByteBufferFeeder feeder, JsonParser parser, reactor.core.publisher.FluxSink<String> sink) {
try (DataBuffer.ByteBufferIterator it = buf.readableByteBuffers()) {
it.forEachRemaining(bbuf -> parseJsonChunk(bbuf, feeder, parser, sink));
} catch (Exception e) {
sink.error(e);
}
}
private void parseJsonChunk(ByteBuffer bbuf, ByteBufferFeeder feeder, JsonParser parser, reactor.core.publisher.FluxSink<String> sink) {
try {
feeder.feedInput(bbuf); // Feed chunk to parser
JsonToken token;
while ((token = parser.nextToken()) != null) { // Process available tokens
if (token == JsonToken.FIELD_NAME) {
sink.next("Field: " + parser.currentName());
}
}
} catch (IOException e) {
sink.error(e);
}
}
private void completeProcessing(ByteBufferFeeder feeder, reactor.core.publisher.FluxSink<String> sink) {
feeder.endOfInput(); // Signal no more data
sink.complete();
}
}
import json
import io
import requests
# Define the server endpoint
url = "http://localhost:8080/stream-json"
# Generate a large JSON object
large_data = {
"name": "John Doe",
"age": 30,
"city": "New York",
"bio": "A" * 15 * 1_024 * 1_024, # 15MB, ironically, 20_000_000 seems to be the limit.
"data": {f"key_{i}": f"value_{i}" for i in range(10_000)} # 10,000 key-value pairs
}
# Convert JSON object to string and encode it as bytes (stream-friendly)
json_stream = io.BytesIO(json.dumps(large_data).encode("utf-8"))
# Send the JSON data as a stream
response = requests.post(url, json=large_data)
# Print response
print("Response Status:", response.status_code)
print("Response Body:", response.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment