Skip to content

Commit b47e41a

Browse files
authored
Add --delete-threshold to deploy plan with some sane defaults. (#1763)
* Add delete threshold validation to DeployCommands.Plan method - Introduced `deleteThreshold` parameter with a default value of 0.2 - Added plan validation for deletion ratio before writing the plan. We will now fail and exit with a non zero exit code when the plan includes more deletions than the allowed ratio (0.2) or when the plan had no source files to begin with. * Adjust delete ratio logic for low sync request thresholds - Update delete ratio calculation to use `TotalSyncRequests` instead of `TotalFilesToSync`. - Enforce stricter delete thresholds for low `TotalSyncRequests` (<100: 0.8, <1000: 0.5) to accommodate higher flux in new documentation. * remove unnecessary pragma
1 parent cdf1788 commit b47e41a

File tree

4 files changed

+275
-70
lines changed

4 files changed

+275
-70
lines changed

src/tooling/docs-assembler/Cli/DeployCommands.cs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,15 @@ private void AssignOutputLogger()
3737
/// <param name="environment"> The environment to build</param>
3838
/// <param name="s3BucketName">The S3 bucket name to deploy to</param>
3939
/// <param name="out"> The file to write the plan to</param>
40+
/// <param name="deleteThreshold"> The percentage of deletions allowed in the plan as percentage of total files to sync</param>
4041
/// <param name="ctx"></param>
4142
public async Task<int> Plan(
42-
string environment, string s3BucketName, string @out = "", Cancel ctx = default)
43+
string environment,
44+
string s3BucketName,
45+
string @out = "",
46+
float deleteThreshold = 0.2f,
47+
Cancel ctx = default
48+
)
4349
{
4450
AssignOutputLogger();
4551
await using var collector = new ConsoleDiagnosticsCollector(logFactory, githubActionsService)
@@ -51,11 +57,25 @@ public async Task<int> Plan(
5157
var s3Client = new AmazonS3Client();
5258
IDocsSyncPlanStrategy planner = new AwsS3SyncPlanStrategy(logFactory, s3Client, s3BucketName, assembleContext);
5359
var plan = await planner.Plan(ctx);
54-
ConsoleApp.Log("Total files to sync: " + plan.Count);
60+
ConsoleApp.Log("Total files to sync: " + plan.TotalSyncRequests);
5561
ConsoleApp.Log("Total files to delete: " + plan.DeleteRequests.Count);
5662
ConsoleApp.Log("Total files to add: " + plan.AddRequests.Count);
5763
ConsoleApp.Log("Total files to update: " + plan.UpdateRequests.Count);
5864
ConsoleApp.Log("Total files to skip: " + plan.SkipRequests.Count);
65+
if (plan.TotalSyncRequests == 0)
66+
{
67+
collector.EmitError(@out, $"Plan has no files to sync so no plan will be written.");
68+
await collector.StopAsync(ctx);
69+
return collector.Errors;
70+
}
71+
var validationResult = planner.Validate(plan, deleteThreshold);
72+
if (!validationResult.Valid)
73+
{
74+
collector.EmitError(@out, $"Plan is invalid, delete ratio: {validationResult.DeleteRatio}, threshold: {validationResult.DeleteThreshold} over {plan.TotalSyncRequests:N0} files while plan has {plan.DeleteRequests:N0} deletions");
75+
await collector.StopAsync(ctx);
76+
return collector.Errors;
77+
}
78+
5979
if (!string.IsNullOrEmpty(@out))
6080
{
6181
var output = SyncPlan.Serialize(plan);
@@ -90,7 +110,7 @@ public async Task<int> Apply(
90110
var transferUtility = new TransferUtility(s3Client, new TransferUtilityConfig
91111
{
92112
ConcurrentServiceRequests = Environment.ProcessorCount * 2,
93-
MinSizeBeforePartUpload = AwsS3SyncPlanStrategy.PartSize
113+
MinSizeBeforePartUpload = S3EtagCalculator.PartSize
94114
});
95115
IDocsSyncApplyStrategy applier = new AwsS3SyncApplyStrategy(logFactory, s3Client, transferUtility, s3BucketName, assembleContext, collector);
96116
if (!File.Exists(planFile))

src/tooling/docs-assembler/Deploying/AwsS3SyncPlanStrategy.cs

Lines changed: 103 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,88 @@
44

55
using System.Collections.Concurrent;
66
using System.Diagnostics.CodeAnalysis;
7+
using System.IO.Abstractions;
78
using System.Security.Cryptography;
89
using Amazon.S3;
910
using Amazon.S3.Model;
1011
using Microsoft.Extensions.Logging;
1112

1213
namespace Documentation.Assembler.Deploying;
1314

14-
public class AwsS3SyncPlanStrategy(ILoggerFactory logFactory, IAmazonS3 s3Client, string bucketName, AssembleContext context) : IDocsSyncPlanStrategy
15+
public interface IS3EtagCalculator
16+
{
17+
Task<string> CalculateS3ETag(string filePath, Cancel ctx = default);
18+
}
19+
20+
public class S3EtagCalculator(ILoggerFactory logFactory, IFileSystem readFileSystem) : IS3EtagCalculator
1521
{
16-
internal const long PartSize = 5 * 1024 * 1024; // 5MB
1722
private readonly ILogger<AwsS3SyncPlanStrategy> _logger = logFactory.CreateLogger<AwsS3SyncPlanStrategy>();
23+
1824
private static readonly ConcurrentDictionary<string, string> EtagCache = new();
1925

26+
internal const long PartSize = 5 * 1024 * 1024; // 5MB
27+
28+
[SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
29+
public async Task<string> CalculateS3ETag(string filePath, Cancel ctx = default)
30+
{
31+
if (EtagCache.TryGetValue(filePath, out var cachedEtag))
32+
{
33+
_logger.LogDebug("Using cached ETag for {Path}", filePath);
34+
return cachedEtag;
35+
}
36+
37+
var fileInfo = readFileSystem.FileInfo.New(filePath);
38+
var fileSize = fileInfo.Length;
39+
40+
// For files under 5MB, use simple MD5 (matching TransferUtility behavior)
41+
if (fileSize <= PartSize)
42+
{
43+
await using var stream = readFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
44+
var smallBuffer = new byte[fileSize];
45+
var bytesRead = await stream.ReadAsync(smallBuffer.AsMemory(0, (int)fileSize), ctx);
46+
var hash = MD5.HashData(smallBuffer.AsSpan(0, bytesRead));
47+
var etag = Convert.ToHexStringLower(hash);
48+
EtagCache[filePath] = etag;
49+
return etag;
50+
}
51+
52+
// For files over 5MB, use multipart format with 5MB parts (matching TransferUtility)
53+
var parts = (int)Math.Ceiling((double)fileSize / PartSize);
54+
55+
await using var fileStream = readFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
56+
var partBuffer = new byte[PartSize];
57+
var partHashes = new List<byte[]>();
58+
59+
for (var i = 0; i < parts; i++)
60+
{
61+
var bytesRead = await fileStream.ReadAsync(partBuffer.AsMemory(0, partBuffer.Length), ctx);
62+
var partHash = MD5.HashData(partBuffer.AsSpan(0, bytesRead));
63+
partHashes.Add(partHash);
64+
}
65+
66+
// Concatenate all part hashes
67+
var concatenatedHashes = partHashes.SelectMany(h => h).ToArray();
68+
var finalHash = MD5.HashData(concatenatedHashes);
69+
70+
var multipartEtag = $"{Convert.ToHexStringLower(finalHash)}-{parts}";
71+
EtagCache[filePath] = multipartEtag;
72+
return multipartEtag;
73+
}
74+
}
75+
76+
public class AwsS3SyncPlanStrategy(
77+
ILoggerFactory logFactory,
78+
IAmazonS3 s3Client,
79+
string bucketName,
80+
AssembleContext context,
81+
IS3EtagCalculator? calculator = null
82+
)
83+
: IDocsSyncPlanStrategy
84+
{
85+
private readonly ILogger<AwsS3SyncPlanStrategy> _logger = logFactory.CreateLogger<AwsS3SyncPlanStrategy>();
86+
87+
private readonly IS3EtagCalculator _s3EtagCalculator = calculator ?? new S3EtagCalculator(logFactory, context.ReadFileSystem);
88+
2089
private bool IsSymlink(string path)
2190
{
2291
var fileInfo = context.ReadFileSystem.FileInfo.New(path);
@@ -42,7 +111,7 @@ await Parallel.ForEachAsync(localObjects, ctx, async (localFile, token) =>
42111
if (remoteObjects.TryGetValue(destinationPath, out var remoteObject))
43112
{
44113
// Check if the ETag differs for updates
45-
var localETag = await CalculateS3ETag(localFile.FullName, token);
114+
var localETag = await _s3EtagCalculator.CalculateS3ETag(localFile.FullName, token);
46115
var remoteETag = remoteObject.ETag.Trim('"'); // Remove quotes from remote ETag
47116
if (localETag == remoteETag)
48117
{
@@ -89,14 +158,44 @@ await Parallel.ForEachAsync(localObjects, ctx, async (localFile, token) =>
89158

90159
return new SyncPlan
91160
{
161+
TotalSourceFiles = localObjects.Length,
92162
DeleteRequests = deleteRequests.ToList(),
93163
AddRequests = addRequests.ToList(),
94164
UpdateRequests = updateRequests.ToList(),
95165
SkipRequests = skipRequests.ToList(),
96-
Count = deleteRequests.Count + addRequests.Count + updateRequests.Count + skipRequests.Count
166+
TotalSyncRequests = deleteRequests.Count + addRequests.Count + updateRequests.Count + skipRequests.Count
97167
};
98168
}
99169

170+
/// <inheritdoc />
171+
public PlanValidationResult Validate(SyncPlan plan, float deleteThreshold)
172+
{
173+
if (plan.TotalSourceFiles == 0)
174+
{
175+
_logger.LogError("No files to sync");
176+
return new(false, 1.0f, deleteThreshold);
177+
}
178+
179+
var deleteRatio = (float)plan.DeleteRequests.Count / plan.TotalSyncRequests;
180+
// if the total sync requests are less than 100, we enforce a higher ratio of 0.8
181+
// this allows newer assembled documentation to be in a higher state of flux
182+
if (plan.TotalSyncRequests <= 100)
183+
deleteThreshold = Math.Max(deleteThreshold, 0.8f);
184+
185+
// if the total sync requests are less than 1000, we enforce a higher ratio of 0.5
186+
// this allows newer assembled documentation to be in a higher state of flux
187+
else if (plan.TotalSyncRequests <= 1000)
188+
deleteThreshold = Math.Max(deleteThreshold, 0.5f);
189+
190+
if (deleteRatio > deleteThreshold)
191+
{
192+
_logger.LogError("Delete ratio is {Ratio} which is greater than the threshold of {Threshold}", deleteRatio, deleteThreshold);
193+
return new(false, deleteRatio, deleteThreshold);
194+
}
195+
196+
return new(true, deleteRatio, deleteThreshold);
197+
}
198+
100199
private async Task<Dictionary<string, S3Object>> ListObjects(Cancel ctx = default)
101200
{
102201
var listBucketRequest = new ListObjectsV2Request
@@ -117,51 +216,4 @@ private async Task<Dictionary<string, S3Object>> ListObjects(Cancel ctx = defaul
117216

118217
return objects.ToDictionary(o => o.Key);
119218
}
120-
121-
[SuppressMessage("Security", "CA5351:Do Not Use Broken Cryptographic Algorithms")]
122-
private async Task<string> CalculateS3ETag(string filePath, Cancel ctx = default)
123-
{
124-
if (EtagCache.TryGetValue(filePath, out var cachedEtag))
125-
{
126-
_logger.LogDebug("Using cached ETag for {Path}", filePath);
127-
return cachedEtag;
128-
}
129-
130-
var fileInfo = context.ReadFileSystem.FileInfo.New(filePath);
131-
var fileSize = fileInfo.Length;
132-
133-
// For files under 5MB, use simple MD5 (matching TransferUtility behavior)
134-
if (fileSize <= PartSize)
135-
{
136-
await using var stream = context.ReadFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
137-
var smallBuffer = new byte[fileSize];
138-
var bytesRead = await stream.ReadAsync(smallBuffer.AsMemory(0, (int)fileSize), ctx);
139-
var hash = MD5.HashData(smallBuffer.AsSpan(0, bytesRead));
140-
var etag = Convert.ToHexStringLower(hash);
141-
EtagCache[filePath] = etag;
142-
return etag;
143-
}
144-
145-
// For files over 5MB, use multipart format with 5MB parts (matching TransferUtility)
146-
var parts = (int)Math.Ceiling((double)fileSize / PartSize);
147-
148-
await using var fileStream = context.ReadFileSystem.FileStream.New(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
149-
var partBuffer = new byte[PartSize];
150-
var partHashes = new List<byte[]>();
151-
152-
for (var i = 0; i < parts; i++)
153-
{
154-
var bytesRead = await fileStream.ReadAsync(partBuffer.AsMemory(0, partBuffer.Length), ctx);
155-
var partHash = MD5.HashData(partBuffer.AsSpan(0, bytesRead));
156-
partHashes.Add(partHash);
157-
}
158-
159-
// Concatenate all part hashes
160-
var concatenatedHashes = partHashes.SelectMany(h => h).ToArray();
161-
var finalHash = MD5.HashData(concatenatedHashes);
162-
163-
var multipartEtag = $"{Convert.ToHexStringLower(finalHash)}-{parts}";
164-
EtagCache[filePath] = multipartEtag;
165-
return multipartEtag;
166-
}
167219
}

src/tooling/docs-assembler/Deploying/DocsSync.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ namespace Documentation.Assembler.Deploying;
1010
public interface IDocsSyncPlanStrategy
1111
{
1212
Task<SyncPlan> Plan(Cancel ctx = default);
13+
14+
PlanValidationResult Validate(SyncPlan plan, float deleteThreshold);
1315
}
16+
public record PlanValidationResult(bool Valid, float DeleteRatio, float DeleteThreshold);
1417

1518
public interface IDocsSyncApplyStrategy
1619
{
@@ -49,8 +52,11 @@ public record SkipRequest : SyncRequest
4952

5053
public record SyncPlan
5154
{
52-
[JsonPropertyName("count")]
53-
public required int Count { get; init; }
55+
[JsonPropertyName("total_source_files")]
56+
public required int TotalSourceFiles { get; init; }
57+
58+
[JsonPropertyName("total_sync_requests")]
59+
public required int TotalSyncRequests { get; init; }
5460

5561
[JsonPropertyName("delete")]
5662
public required IReadOnlyList<DeleteRequest> DeleteRequests { get; init; }

0 commit comments

Comments
 (0)