-
Notifications
You must be signed in to change notification settings - Fork 5k
[Windows] Illustration of fix in FileSystemWatcher to prevent SOH fragmentation from memory leak of pinned 8K byte[] buffers #116613
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
subbucse
wants to merge
2
commits into
dotnet:main
from
subbucse:suvasu/dev/illustrateMemLeakFileSystemWatcher
Closed
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
...leSystem.Watcher/tests/Utility/FileSystemWatcherHeapLeak/FileSystemWatcherHeapLeak.csproj
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
<ServerGarbageCollection>true</ServerGarbageCollection> | ||
</PropertyGroup> | ||
|
||
</Project> |
214 changes: 214 additions & 0 deletions
214
...libraries/System.IO.FileSystem.Watcher/tests/Utility/FileSystemWatcherHeapLeak/Program.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
using System; | ||
using System.Runtime; | ||
|
||
namespace FileSystemWatcherHeapLeak | ||
{ | ||
internal class Program | ||
{ | ||
static void Main(string[] args) | ||
{ | ||
/* | ||
* Set both min and max IOCP threads to 1 to serialize directory change callbacks | ||
* and give GC a higher chance of winning the race after disposing the FileSystemWatcher. | ||
*/ | ||
ThreadPool.GetMinThreads(out int minWorker, out _); | ||
ThreadPool.GetMaxThreads(out int maxWorker, out _); | ||
ThreadPool.SetMinThreads(minWorker, 1); | ||
ThreadPool.SetMaxThreads(maxWorker, 1); | ||
|
||
Console.WriteLine($"Starting {nameof(FileSystemWatcher)} heap leak test..."); | ||
Console.WriteLine("Press Ctrl+C to stop the test."); | ||
|
||
int disposeProbability = 100; | ||
while (true) | ||
{ | ||
Console.Write("Enter probability (0-100) to dispose FileSystemWatcher after each run: "); | ||
var input = Console.ReadLine(); | ||
if (int.TryParse(input, out disposeProbability) && | ||
disposeProbability >= 0 && disposeProbability <= 100) | ||
{ | ||
break; | ||
} | ||
else | ||
{ | ||
Console.WriteLine("Please enter a valid integer between 0 and 100."); | ||
} | ||
} | ||
|
||
int maxGeneration = 2; | ||
while (true) | ||
{ | ||
Console.Write("Enter max GC generation to collect (0, 1, or 2): "); | ||
var input = Console.ReadLine(); | ||
if (int.TryParse(input, out maxGeneration) && maxGeneration >= 0 && maxGeneration <= GC.MaxGeneration) | ||
{ | ||
break; | ||
} | ||
else | ||
{ | ||
Console.WriteLine($"Please enter a valid integer between 0 and {GC.MaxGeneration}."); | ||
} | ||
} | ||
|
||
bool compacting = false; | ||
while (true) | ||
{ | ||
Console.Write("Enable compacting GC? (y/n): "); | ||
var input = Console.ReadLine(); | ||
if (string.Equals(input, "y", StringComparison.OrdinalIgnoreCase)) | ||
{ | ||
compacting = true; | ||
break; | ||
} | ||
else if (string.Equals(input, "n", StringComparison.OrdinalIgnoreCase)) | ||
{ | ||
compacting = false; | ||
break; | ||
} | ||
else | ||
{ | ||
Console.WriteLine("Please enter 'y' or 'n'."); | ||
} | ||
} | ||
|
||
Console.WriteLine($"Spawning {minWorker} parallel test loops (one per minWorkerThread)."); | ||
|
||
var tasks = new Task[minWorker]; | ||
|
||
int i = 0; | ||
for (i = 0; i < minWorker - 1; i++) | ||
{ | ||
int taskId = i; // for closure capture | ||
|
||
tasks[i] = Task.Run(() => | ||
{ | ||
var rng = new Random(Environment.TickCount ^ taskId); | ||
|
||
while (true) | ||
{ | ||
try | ||
{ | ||
bool wasDisposed = false; | ||
FileSystemWatcher watcher = null; | ||
|
||
try | ||
{ | ||
watcher = new FileSystemWatcher(@"C:\Temp"); | ||
watcher.EnableRaisingEvents = true; | ||
|
||
Thread.Sleep(rng.Next(5, 10)); // Keep the watcher alive for a short while | ||
} | ||
finally | ||
{ | ||
wasDisposed = rng.Next(0, 100) < disposeProbability; | ||
|
||
if (wasDisposed) | ||
{ | ||
watcher?.Dispose(); | ||
} | ||
|
||
watcher = null; | ||
} | ||
|
||
/* | ||
* When we reach here, there are 2 cases: | ||
* 1) When we have disposed the FileSystemWatcher | ||
* Disposal of FileSystemWatcher will dispose the underlying directory SafeFileHandle and kick off a race between: | ||
* a) The IOCP thread executing the FileSystemWatcher directory changes callback, which will come in with | ||
* errorCode = ERROR_OPERATION_ABORTED, and simultaneously, | ||
* b) The dedicated Server GC threads trying to collect the heap triggered below. | ||
* If a) wins the race, everything is fine :) | ||
* WHY? The AsycState's WeakRef<FileSystemwatcher> in the callback will be intact, so watcher.ReadDirectoryChangesCallback | ||
* will be called, it will find out that the directory handle is invalid now, and it will kick off the next Monitor() call, | ||
* which will in turn find out that the directory handle is invalid now and proceed to properly call PreAllocatedOverlapped.Dispose() | ||
* to unpin and free up the pinned 8K byte[] buffer. | ||
* However, if b) wins the race, !! WE HAVE A HEAP LEAK !! | ||
* WHY? The AsycState's WeakRef<FileSystemwatcher> in the callback will be nulled out as soon as watcher is queued for finalization, | ||
* especially since its a *SHORT* WeakRef, and the callback will not be able to find the watcher anymore. | ||
* so watcher.ReadDirectoryChangesCallback is not at all called, so there is no chance of disposing the PreAllocatedOverlapped | ||
* and freeing up the pinned 8K byte[] buffer and we have leaked it for eternity. | ||
* 2) When we have not disposed the FileSystemWatcher | ||
* This case makes the heap leak fully deterministic, and not probabilistic, because the watcher is disposed from Finalizer thread, | ||
* which will dispose the underlying directory SafeFileHandle and schedule the directory changes callback on IOCP thread, but by that time, | ||
* the AsycState's WeakRef<FileSystemwatcher> in the callback will *ALWAYS* be nulled out, since its a *SHORT* WeakRef. | ||
* so watcher.ReadDirectoryChangesCallback is not at all called, so there is no chance of disposing the PreAllocatedOverlapped | ||
* and freeing up the pinned 8K byte[] buffer and we have leaked it for eternity. | ||
* | ||
* Note: We have already enabled Server GC mode in the project file, so as to pause all threads | ||
* including the IOCP threads executing the FileSystemWatcher directory changes callbacks. | ||
* This will enhance the ability of GC to win the above race after disposing the watcher. | ||
*/ | ||
|
||
if (compacting) | ||
{ | ||
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; | ||
} | ||
else | ||
{ | ||
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.Default; | ||
} | ||
|
||
GC.Collect( | ||
maxGeneration, | ||
GCCollectionMode.Forced | GCCollectionMode.Aggressive, | ||
blocking: true, | ||
compacting: compacting); | ||
|
||
var gcInfo = GC.GetGCMemoryInfo(); | ||
|
||
Console.WriteLine( | ||
$"[Thread {taskId}] Watcher was {(wasDisposed ? string.Empty : "not ")}disposed, " + | ||
$"GC Gen: {maxGeneration}, Compacting: {compacting}, " + | ||
$"Commited Bytes = {gcInfo.TotalCommittedBytes}, " + | ||
$"Heap Size Bytes = {gcInfo.HeapSizeBytes}, " + | ||
$"Fragmented Bytes = {gcInfo.FragmentedBytes}, " + | ||
$"Fragmentation % = {100.0 * (double)gcInfo.FragmentedBytes / (double)gcInfo.TotalCommittedBytes}, " + | ||
$"Pinned Object Count = {gcInfo.PinnedObjectsCount}."); | ||
} | ||
catch (Exception ex) | ||
{ | ||
Console.WriteLine($"[Thread {taskId}] Exception encountered but continuing to next iteration. Exception: {ex.Message}"); | ||
} | ||
} | ||
}); | ||
} | ||
|
||
/* | ||
* The last task is reserved for a thread that will allocate large amount of memory | ||
* on SOH in parallel with other threads, and give a random chance for those allocs | ||
* to survive GC generations, thus creating pockets of free memory between the byte[] | ||
* instances that get leaked by the activity of other threads, inducing fragmentation. | ||
* This thread is designed to resemble the SOH allocations that are made in our server app | ||
* to conduct its business, all of which are mostly ephemeral and short-lived. | ||
*/ | ||
tasks[i] = Task.Run(() => | ||
{ | ||
var rng = new Random(Environment.TickCount ^ i); | ||
|
||
const int chunkSize = 50 * 1024; // 50 KB (making sure we allocate on SOH to that it gets fragmented) | ||
const int minTotal = 500 * 1024; // 500 KB | ||
const int maxTotal = 1024 * 1024;// 1 MB | ||
|
||
while (true) | ||
{ | ||
int totalToAllocate = rng.Next(minTotal, maxTotal + 1); | ||
int allocated = 0; | ||
var allocations = new List<string>(); | ||
|
||
while (allocated < totalToAllocate) | ||
{ | ||
int charCount = chunkSize / 2; | ||
string s = new string('A', charCount); | ||
allocations.Add(s); | ||
allocated += chunkSize; | ||
} | ||
|
||
Thread.Sleep(rng.Next(5, 50)); // Hold on to allocs for a while, giving a random chance for it to survive GC generations | ||
allocations.Clear(); // Then release them and other threads will force GC to collect them. | ||
} | ||
}); | ||
|
||
Task.WaitAll(tasks); // This will block forever unless the process is killed | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@stephentoub You mentioned in our teams chat that there was explicit intent to avoid pinning on POH because the pinning is supposed to be temporary. Did you mean that you expect the pinned buffer to be unpinned and given up for GC after the IO callback comes in from every async overlapped call to ReadDirectoryChangesW ?
If so, I understand not using POH, because the pinning is highly transient, but that's not what's happening now. As you see below, we only call state.ThreadPoolBinding.FreeNativeOverlapped(overlappedPointer) which will internally call AsyncState.PreAllocatedOverlapped.Release(), and NOT AsyncState.PreAllocatedOverlapped.Dispose() which is what ultimately unpins and frees up the pinned 8K byte[] buffer.
So if the intent is to keep the buffer pinned until end of lifetime of FSW, then shouldn't we consider using POH since that timeframe could be long?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not necessarily. My statement was about why we avoid the Pinned Object Heap, because things on that heap are never moved and thus creating allocations there that won't effectively be there for the remainder of the process can lead to significant fragmentation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There can be a significant difference between the lifetime of an FSW instance and the lifetime of the process. It's totally feasible to create transient FSWs.