Skip to content

Commit 032bbb9

Browse files
authored
http.sys accept loop - mitigate against break due to possible conflicting IO callbacks (#54368)
* investigate #54251 (more details will be in PR) 1. handle MRTVS cascading fault breaking the accept loop 2. log any expectation failures * make log text consistent * upgrade log to "critical" on the actual state error * - capture whether it is the managed vs unmanaged code-path that is glitching - lock new critical log messages behind app-context switch
1 parent b3b8dff commit 032bbb9

File tree

5 files changed

+142
-40
lines changed

5 files changed

+142
-40
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using Microsoft.Extensions.Logging;
5+
6+
namespace Microsoft.AspNetCore.Server.HttpSys;
7+
8+
internal partial class AsyncAcceptContext
9+
{
10+
private static partial class Log
11+
{
12+
[LoggerMessage(LoggerEventIds.AcceptSetResultFailed, LogLevel.Error, "Error attempting to set 'accept' outcome", EventName = "AcceptSetResultFailed")]
13+
public static partial void AcceptSetResultFailed(ILogger logger, Exception exception);
14+
15+
// note on "critical": these represent an unexpected IO callback state that needs investigation; see https://github.com/dotnet/aspnetcore/pull/54368/
16+
17+
[LoggerMessage(LoggerEventIds.AcceptSetExpectationMismatch, LogLevel.Critical, "Mismatch setting callback expectation - {Value}", EventName = "AcceptSetExpectationMismatch")]
18+
public static partial void AcceptSetExpectationMismatch(ILogger logger, int value);
19+
20+
[LoggerMessage(LoggerEventIds.AcceptCancelExpectationMismatch, LogLevel.Critical, "Mismatch canceling accept state - {Value}", EventName = "AcceptCancelExpectationMismatch")]
21+
public static partial void AcceptCancelExpectationMismatch(ILogger logger, int value);
22+
23+
[LoggerMessage(LoggerEventIds.AcceptObserveExpectationMismatch, LogLevel.Critical, "Mismatch observing {Kind} accept callback - {Value}", EventName = "AcceptObserveExpectationMismatch")]
24+
public static partial void AcceptObserveExpectationMismatch(ILogger logger, string kind, int value);
25+
}
26+
}

src/Servers/HttpSys/src/AsyncAcceptContext.cs

Lines changed: 110 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,23 @@
33

44
using System.Diagnostics;
55
using System.Threading.Tasks.Sources;
6+
using Microsoft.Extensions.Logging;
67

78
namespace Microsoft.AspNetCore.Server.HttpSys;
89

9-
internal sealed unsafe class AsyncAcceptContext : IValueTaskSource<RequestContext>, IDisposable
10+
internal sealed unsafe partial class AsyncAcceptContext : IValueTaskSource<RequestContext>, IDisposable
1011
{
1112
private static readonly IOCompletionCallback IOCallback = IOWaitCallback;
1213
private readonly PreAllocatedOverlapped _preallocatedOverlapped;
1314
private readonly IRequestContextFactory _requestContextFactory;
15+
private readonly ILogger _logger;
16+
private int _expectedCompletionCount;
1417

1518
private NativeOverlapped* _overlapped;
1619

20+
private readonly bool _logExpectationFailures = AppContext.TryGetSwitch(
21+
"Microsoft.AspNetCore.Server.HttpSys.LogAcceptExpectationFailure", out var enabled) && enabled;
22+
1723
// mutable struct; do not make this readonly
1824
private ManualResetValueTaskSourceCore<RequestContext> _mrvts = new()
1925
{
@@ -23,11 +29,12 @@ internal sealed unsafe class AsyncAcceptContext : IValueTaskSource<RequestContex
2329

2430
private RequestContext? _requestContext;
2531

26-
internal AsyncAcceptContext(HttpSysListener server, IRequestContextFactory requestContextFactory)
32+
internal AsyncAcceptContext(HttpSysListener server, IRequestContextFactory requestContextFactory, ILogger logger)
2733
{
2834
Server = server;
2935
_requestContextFactory = requestContextFactory;
3036
_preallocatedOverlapped = new(IOCallback, state: this, pinData: null);
37+
_logger = logger;
3138
}
3239

3340
internal HttpSysListener Server { get; }
@@ -50,15 +57,16 @@ internal ValueTask<RequestContext> AcceptAsync()
5057
return new ValueTask<RequestContext>(this, _mrvts.Version);
5158
}
5259

53-
private void IOCompleted(uint errorCode, uint numBytes)
60+
private void IOCompleted(uint errorCode, uint numBytes, bool managed)
5461
{
5562
try
5663
{
64+
ObserveCompletion(managed); // expectation tracking
5765
if (errorCode != ErrorCodes.ERROR_SUCCESS &&
5866
errorCode != ErrorCodes.ERROR_MORE_DATA)
5967
{
60-
_mrvts.SetException(new HttpSysException((int)errorCode));
61-
return;
68+
// (keep all the error handling in one place)
69+
throw new HttpSysException((int)errorCode);
6270
}
6371

6472
Debug.Assert(_requestContext != null);
@@ -70,7 +78,14 @@ private void IOCompleted(uint errorCode, uint numBytes)
7078
// we want to reuse the acceptContext object for future accepts.
7179
_requestContext = null;
7280

73-
_mrvts.SetResult(requestContext);
81+
try
82+
{
83+
_mrvts.SetResult(requestContext);
84+
}
85+
catch (Exception ex)
86+
{
87+
Log.AcceptSetResultFailed(_logger, ex);
88+
}
7489
}
7590
else
7691
{
@@ -83,22 +98,69 @@ private void IOCompleted(uint errorCode, uint numBytes)
8398
if (statusCode != ErrorCodes.ERROR_SUCCESS &&
8499
statusCode != ErrorCodes.ERROR_IO_PENDING)
85100
{
86-
// someother bad error, possible(?) return values are:
101+
// some other bad error, possible(?) return values are:
87102
// ERROR_INVALID_HANDLE, ERROR_INSUFFICIENT_BUFFER, ERROR_OPERATION_ABORTED
88-
_mrvts.SetException(new HttpSysException((int)statusCode));
103+
// (keep all the error handling in one place)
104+
throw new HttpSysException((int)statusCode);
89105
}
90106
}
91107
}
92108
catch (Exception exception)
93109
{
94-
_mrvts.SetException(exception);
110+
try
111+
{
112+
_mrvts.SetException(exception);
113+
}
114+
catch (Exception ex)
115+
{
116+
Log.AcceptSetResultFailed(_logger, ex);
117+
}
95118
}
96119
}
97120

98121
private static unsafe void IOWaitCallback(uint errorCode, uint numBytes, NativeOverlapped* nativeOverlapped)
99122
{
100123
var acceptContext = (AsyncAcceptContext)ThreadPoolBoundHandle.GetNativeOverlappedState(nativeOverlapped)!;
101-
acceptContext.IOCompleted(errorCode, numBytes);
124+
acceptContext.IOCompleted(errorCode, numBytes, false);
125+
}
126+
127+
private void SetExpectCompletion() // we anticipate a completion *might* occur
128+
{
129+
// note this is intentionally a "reset and check" rather than Increment, so that we don't spam
130+
// the logs forever if a glitch occurs
131+
var value = Interlocked.Exchange(ref _expectedCompletionCount, 1); // should have been 0
132+
if (value != 0)
133+
{
134+
if (_logExpectationFailures)
135+
{
136+
Log.AcceptSetExpectationMismatch(_logger, value);
137+
}
138+
Debug.Assert(false, nameof(SetExpectCompletion)); // fail hard in debug
139+
}
140+
}
141+
private void CancelExpectCompletion() // due to error-code etc, we no longer anticipate a completion
142+
{
143+
var value = Interlocked.Decrement(ref _expectedCompletionCount); // should have been 1, so now 0
144+
if (value != 0)
145+
{
146+
if (_logExpectationFailures)
147+
{
148+
Log.AcceptCancelExpectationMismatch(_logger, value);
149+
}
150+
Debug.Assert(false, nameof(CancelExpectCompletion)); // fail hard in debug
151+
}
152+
}
153+
private void ObserveCompletion(bool managed) // a completion was invoked
154+
{
155+
var value = Interlocked.Decrement(ref _expectedCompletionCount); // should have been 1, so now 0
156+
if (value != 0)
157+
{
158+
if (_logExpectationFailures)
159+
{
160+
Log.AcceptObserveExpectationMismatch(_logger, managed ? "managed" : "unmanaged", value);
161+
}
162+
Debug.Assert(false, nameof(ObserveCompletion)); // fail hard in debug
163+
}
102164
}
103165

104166
private uint QueueBeginGetContext()
@@ -111,6 +173,7 @@ private uint QueueBeginGetContext()
111173

112174
retry = false;
113175
uint bytesTransferred = 0;
176+
SetExpectCompletion(); // track this *before*, because of timing vs IOCP (could even be effectively synchronous)
114177
statusCode = HttpApi.HttpReceiveHttpRequest(
115178
Server.RequestQueue.Handle,
116179
_requestContext.RequestId,
@@ -122,35 +185,44 @@ private uint QueueBeginGetContext()
122185
&bytesTransferred,
123186
_overlapped);
124187

125-
if ((statusCode == ErrorCodes.ERROR_CONNECTION_INVALID
126-
|| statusCode == ErrorCodes.ERROR_INVALID_PARAMETER)
127-
&& _requestContext.RequestId != 0)
128-
{
129-
// ERROR_CONNECTION_INVALID:
130-
// The client reset the connection between the time we got the MORE_DATA error and when we called HttpReceiveHttpRequest
131-
// with the new buffer. We can clear the request id and move on to the next request.
132-
//
133-
// ERROR_INVALID_PARAMETER: Historical check from HttpListener.
134-
// https://referencesource.microsoft.com/#System/net/System/Net/_ListenerAsyncResult.cs,137
135-
// we might get this if somebody stole our RequestId,
136-
// set RequestId to 0 and start all over again with the buffer we just allocated
137-
// BUGBUG: how can someone steal our request ID? seems really bad and in need of fix.
138-
_requestContext.RequestId = 0;
139-
retry = true;
140-
}
141-
else if (statusCode == ErrorCodes.ERROR_MORE_DATA)
142-
{
143-
// the buffer was not big enough to fit the headers, we need
144-
// to read the RequestId returned, allocate a new buffer of the required size
145-
// (uint)backingBuffer.Length - AlignmentPadding
146-
AllocateNativeRequest(bytesTransferred);
147-
retry = true;
148-
}
149-
else if (statusCode == ErrorCodes.ERROR_SUCCESS
150-
&& HttpSysListener.SkipIOCPCallbackOnSuccess)
188+
switch (statusCode)
151189
{
152-
// IO operation completed synchronously - callback won't be called to signal completion.
153-
IOCompleted(statusCode, bytesTransferred);
190+
case (ErrorCodes.ERROR_CONNECTION_INVALID or ErrorCodes.ERROR_INVALID_PARAMETER) when _requestContext.RequestId != 0:
191+
// ERROR_CONNECTION_INVALID:
192+
// The client reset the connection between the time we got the MORE_DATA error and when we called HttpReceiveHttpRequest
193+
// with the new buffer. We can clear the request id and move on to the next request.
194+
//
195+
// ERROR_INVALID_PARAMETER: Historical check from HttpListener.
196+
// https://referencesource.microsoft.com/#System/net/System/Net/_ListenerAsyncResult.cs,137
197+
// we might get this if somebody stole our RequestId,
198+
// set RequestId to 0 and start all over again with the buffer we just allocated
199+
// BUGBUG: how can someone steal our request ID? seems really bad and in need of fix.
200+
CancelExpectCompletion();
201+
_requestContext.RequestId = 0;
202+
retry = true;
203+
break;
204+
case ErrorCodes.ERROR_MORE_DATA:
205+
// the buffer was not big enough to fit the headers, we need
206+
// to read the RequestId returned, allocate a new buffer of the required size
207+
// (uint)backingBuffer.Length - AlignmentPadding
208+
CancelExpectCompletion(); // we'll "expect" again when we retry
209+
AllocateNativeRequest(bytesTransferred);
210+
retry = true;
211+
break;
212+
case ErrorCodes.ERROR_SUCCESS:
213+
if (HttpSysListener.SkipIOCPCallbackOnSuccess)
214+
{
215+
// IO operation completed synchronously - callback won't be called to signal completion.
216+
IOCompleted(statusCode, bytesTransferred, true); // marks completion
217+
}
218+
// else: callback fired by IOCP (at some point), which marks completion
219+
break;
220+
case ErrorCodes.ERROR_IO_PENDING:
221+
break; // no change to state - callback will occur at some point
222+
default:
223+
// fault code, not expecting an IOCP callback
224+
CancelExpectCompletion();
225+
break;
154226
}
155227
}
156228
while (retry);

src/Servers/HttpSys/src/LoggerEventIds.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,8 @@ internal static class LoggerEventIds
5454
public const int RequestValidationFailed = 47;
5555
public const int CreateDisconnectTokenError = 48;
5656
public const int RequestAborted = 49;
57+
public const int AcceptSetResultFailed = 50;
58+
public const int AcceptSetExpectationMismatch = 51;
59+
public const int AcceptCancelExpectationMismatch = 52;
60+
public const int AcceptObserveExpectationMismatch = 53;
5761
}

src/Servers/HttpSys/src/MessagePump.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ private void ProcessRequestsWorker()
162162
Debug.Assert(RequestContextFactory != null);
163163

164164
// Allocate and accept context per loop and reuse it for all accepts
165-
var acceptContext = new AsyncAcceptContext(Listener, RequestContextFactory);
165+
var acceptContext = new AsyncAcceptContext(Listener, RequestContextFactory, _logger);
166166

167167
var loop = new AcceptLoop(acceptContext, this);
168168

src/Servers/HttpSys/test/FunctionalTests/Listener/Utilities.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ internal static HttpSysListener CreateServerOnExistingQueue(string requestQueueN
107107
internal static async Task<RequestContext> AcceptAsync(this HttpSysListener server, TimeSpan timeout)
108108
{
109109
var factory = new TestRequestContextFactory(server);
110-
using var acceptContext = new AsyncAcceptContext(server, factory);
110+
using var acceptContext = new AsyncAcceptContext(server, factory, server.Logger);
111111

112112
async Task<RequestContext> AcceptAsync()
113113
{

0 commit comments

Comments
 (0)