Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/api/Elastic.Documentation.Api/ServicesExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,18 @@ private static void AddOtlpProxyService(IServiceCollection services, AppEnv appE
// 1s timeout: the collector is a localhost sidecar and should answer in single-digit ms.
// RemoveAllResilienceHandlers opts this client out of the global standard resilience handler
// (retries + 10s/30s timeouts) so a dead collector fails fast instead of blocking ~9s.
// PooledConnectionLifetime=30s proactively recycles connections before the sidecar closes them,
// keeping the stale-connection drop rate negligible.
#pragma warning disable EXTEXP0001 // RemoveAllResilienceHandlers is experimental
_ = services.AddHttpClient(AdotOtlpService.HttpClientName)
.ConfigureHttpClient(client =>
{
client.Timeout = TimeSpan.FromSeconds(1);
})
.ConfigurePrimaryHttpMessageHandler(() => new SocketsHttpHandler
{
PooledConnectionLifetime = TimeSpan.FromSeconds(30),
})
.RemoveAllResilienceHandlers();
#pragma warning restore EXTEXP0001

Expand Down
23 changes: 18 additions & 5 deletions src/api/Elastic.Documentation.Api/Telemetry/AdotOtlpService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information

using System.Diagnostics;
using System.Diagnostics.Metrics;
using System.Net.Sockets;
using Microsoft.Extensions.Logging;

Expand All @@ -18,6 +19,10 @@ public class AdotOtlpService(
{
public const string HttpClientName = "OtlpProxy";
private static readonly ActivitySource ActivitySource = new(TelemetryConstants.OtlpProxySourceName);
private static readonly Meter Meter = new(TelemetryConstants.OtlpProxySourceName);
internal static readonly Counter<int> StaleConnectionDrops =
Meter.CreateCounter<int>("otlp.proxy.stale_connection.dropped",
description: "OTLP batches silently dropped due to a stale pooled connection to the ADOT collector");
private readonly HttpClient _httpClient = httpClientFactory.CreateClient(HttpClientName);

/// <inheritdoc />
Expand Down Expand Up @@ -62,12 +67,14 @@ public async Task<OtlpForwardResult> ForwardOtlp(
catch (Exception ex)
{
var (statusCode, message) = MapExceptionToStatusCode(ex);
logger.LogError(ex, "Error forwarding OTLP {SignalType}: {ErrorMessage}", signalType, message);
return new OtlpForwardResult
if (statusCode == 204)
{
StatusCode = statusCode,
Content = message
};
StaleConnectionDrops.Add(1);
logger.LogDebug("Dropped OTLP {SignalType} batch on stale connection; collector will reconnect", signalType);
}
else
logger.LogError(ex, "Error forwarding OTLP {SignalType}: {ErrorMessage}", signalType, message);
return new OtlpForwardResult { StatusCode = statusCode, Content = message };
}
}

Expand All @@ -85,6 +92,12 @@ private static (int StatusCode, string Message) MapExceptionToStatusCode(Excepti
TaskCanceledException or OperationCanceledException
=> (504, "Request to telemetry collector timed out"),

// Stale pooled connection — SocketsHttpHandler sets AllowRetry=false for non-seekable
// StreamContent, so it throws rather than retrying. OTLP is best-effort; return 204
// so the browser exporter doesn't treat this as a retryable 502.
HttpRequestException { InnerException: IOException }
=> (204, string.Empty),

// Other HTTP/network errors - bad gateway
HttpRequestException
=> (502, "Failed to communicate with telemetry collector"),
Expand Down
96 changes: 96 additions & 0 deletions tests/Elastic.Documentation.Api.Tests/OtlpProxyTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,102 @@ public async Task OtlpProxy_CollectorUnavailable_FailsFastWithoutRetries()
response.StatusCode.Should().Be(HttpStatusCode.ServiceUnavailable);
}

[Fact]
public async Task OtlpProxy_ForwardedBodyMatchesInput()
{
// Arrange
var mockHandler = A.Fake<HttpMessageHandler>();
var capturedBody = (byte[]?)null;

var mockResponse = new HttpResponseMessage(HttpStatusCode.OK)
{
Content = new StringContent("{}")
};

A.CallTo(mockHandler)
.Where(call => call.Method.Name == "SendAsync")
.WithReturnType<Task<HttpResponseMessage>>()
.Invokes(async (HttpRequestMessage req, CancellationToken ct) =>
capturedBody = await req.Content!.ReadAsByteArrayAsync(ct))
.Returns(Task.FromResult(mockResponse));

using var factory = ApiWebApplicationFactory.WithMockedServices(services =>
{
_ = services.AddHttpClient(AdotOtlpService.HttpClientName)
.ConfigurePrimaryHttpMessageHandler(() => mockHandler);
}, otlpEndpoint: OtlpEndpoint);

var client = factory.CreateClient();
var originalPayload = Encoding.UTF8.GetBytes(/*lang=json,strict*/ """{"resourceSpans":[]}""");
using var content = new ByteArrayContent(originalPayload);
content.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/json");

// Act
using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken);

// Assert — bytes arriving at the collector must exactly match the original payload
response.StatusCode.Should().Be(HttpStatusCode.NoContent);
capturedBody.Should().NotBeNull();
capturedBody.Should().BeEquivalentTo(originalPayload);

mockResponse.Dispose();
}

[Fact]
public async Task OtlpProxy_SendAsyncThrowsNonIo_MapsToGatewayError()
{
var callCount = 0;
var mockHandler = A.Fake<HttpMessageHandler>();

A.CallTo(mockHandler)
.Where(call => call.Method.Name == "SendAsync")
.WithReturnType<Task<HttpResponseMessage>>()
.Invokes((HttpRequestMessage _, CancellationToken _) => callCount++)
.Throws(new HttpRequestException("Some non-IO network error"));

using var factory = ApiWebApplicationFactory.WithMockedServices(services =>
{
_ = services.AddHttpClient(AdotOtlpService.HttpClientName)
.ConfigurePrimaryHttpMessageHandler(() => mockHandler);
}, otlpEndpoint: OtlpEndpoint);

var client = factory.CreateClient();
using var content = new StringContent(/*lang=json,strict*/ """{"resourceSpans":[]}""", Encoding.UTF8, "application/json");

using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken);

callCount.Should().Be(1);
response.StatusCode.Should().Be(HttpStatusCode.BadGateway);
}

[Fact]
public async Task OtlpProxy_StaleConnection_DropsWithNoContent()
{
// SocketsHttpHandler detects a stale pooled connection and throws
// HttpRequestException { InnerException: IOException } (AllowRetry=false on non-seekable
// StreamContent). The proxy maps this to 204 so the browser OTLP exporter doesn't
// interpret it as a retryable 502.
var mockHandler = A.Fake<HttpMessageHandler>();

A.CallTo(mockHandler)
.Where(call => call.Method.Name == "SendAsync")
.WithReturnType<Task<HttpResponseMessage>>()
.Throws(new HttpRequestException("Stale connection", new IOException("Connection reset by peer")));

using var factory = ApiWebApplicationFactory.WithMockedServices(services =>
{
_ = services.AddHttpClient(AdotOtlpService.HttpClientName)
.ConfigurePrimaryHttpMessageHandler(() => mockHandler);
}, otlpEndpoint: OtlpEndpoint);

var client = factory.CreateClient();
using var content = new StringContent(/*lang=json,strict*/ """{"resourceSpans":[]}""", Encoding.UTF8, "application/json");

using var response = await client.PostAsync("/docs/_api/v1/o/t", content, TestContext.Current.CancellationToken);

response.StatusCode.Should().Be(HttpStatusCode.NoContent);
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

[Fact]
public async Task OtlpProxyInvalidSignalTypeReturns404()
{
Expand Down
Loading