Skip to content

Commit d106919

Browse files
YarinOmesifgarciacorona
authored andcommitted
Improve performace of repeated packed fixedSize fields (protocolbuffers#19667)
## Changes Improve performance of packed RepeatedField that has fixed size, by coping `LEN` bytes from serialzied buffer to RepeatedField array (like c memcpy). ## Relevant Information As the docs [here](https://protobuf.dev/programming-guides/encoding/#cheat-sheet) notes, fixed size value is `memcpy of the equivalent C types (u?int64_t, double)` You can see benchmark code and results [here](https://github.com/YarinOmesi/protobuf/blob/feature-with-benchmark/csharp/src/Benchmark) ## Summarized result: | Method | Job | Size | Mean | Ratio | Allocated | |----------------------|------------------------------|--------|-------------:|---------:|----------:| | Parse_RepeatedFloats | 3.29.0 | 100000 | 416,582.7 ns | baseline | 400297 B | | Parse_RepeatedFloats | improved-fixed-size-repeated | 100000 | 134,671.7 ns | -68% | 400776 B | | Write_RepeatedFloats | 3.29.0 | 100000 | 340.549 us | baseline | 391.57 KB | | Write_RepeatedFloats | improved-fixed-size-repeated | 100000 | 106.577 us | -69% | 391.5 KB | --- I am pretty new to this repo to tell me what to you think about this. Closes protocolbuffers#19667 COPYBARA_INTEGRATE_REVIEW=protocolbuffers#19667 from YarinOmesi:improve-performance-repeated-packed-fixed-field de11524 PiperOrigin-RevId: 729380114
1 parent ec59935 commit d106919

File tree

2 files changed

+76
-11
lines changed

2 files changed

+76
-11
lines changed

csharp/src/Google.Protobuf/Collections/RepeatedField.cs

+51-7
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
#endregion
99

1010
using System;
11+
using System.Buffers;
1112
using System.Collections;
1213
using System.Collections.Generic;
1314
using System.Diagnostics;
1415
using System.IO;
1516
using System.Linq;
17+
using System.Runtime.InteropServices;
1618
using System.Security;
1719
#if NET5_0_OR_GREATER
1820
using System.Runtime.CompilerServices;
@@ -116,12 +118,24 @@ public void AddEntriesFrom(ref ParseContext ctx, FieldCodec<T> codec)
116118
{
117119
EnsureSize(count + (length / codec.FixedSize));
118120

119-
while (!SegmentedBufferHelper.IsReachedLimit(ref ctx.state))
121+
// if littleEndian treat array as bytes and directly copy from buffer for improved performance
122+
if(TryGetArrayAsSpanPinnedUnsafe(codec, out Span<byte> span, out GCHandle handle))
123+
{
124+
span = span.Slice(count * codec.FixedSize);
125+
Debug.Assert(span.Length >= length);
126+
ParsingPrimitives.ReadPackedFieldLittleEndian(ref ctx.buffer, ref ctx.state, length, span);
127+
count += length / codec.FixedSize;
128+
handle.Free();
129+
}
130+
else
120131
{
121-
// Only FieldCodecs with a fixed size can reach here, and they are all known
122-
// types that don't allow the user to specify a custom reader action.
123-
// reader action will never return null.
124-
array[count++] = reader(ref ctx);
132+
while (!SegmentedBufferHelper.IsReachedLimit(ref ctx.state))
133+
{
134+
// Only FieldCodecs with a fixed size can reach here, and they are all known
135+
// types that don't allow the user to specify a custom reader action.
136+
// reader action will never return null.
137+
array[count++] = reader(ref ctx);
138+
}
125139
}
126140
}
127141
else
@@ -241,9 +255,21 @@ public void WriteTo(ref WriteContext ctx, FieldCodec<T> codec)
241255
int size = CalculatePackedDataSize(codec);
242256
ctx.WriteTag(tag);
243257
ctx.WriteLength(size);
244-
for (int i = 0; i < count; i++)
258+
259+
// if littleEndian and elements has fixed size, treat array as bytes (and write it as bytes to buffer) for improved performance
260+
if(TryGetArrayAsSpanPinnedUnsafe(codec, out Span<byte> span, out GCHandle handle))
245261
{
246-
writer(ref ctx, array[i]);
262+
span = span.Slice(0, Count * codec.FixedSize);
263+
264+
WritingPrimitives.WriteRawBytes(ref ctx.buffer, ref ctx.state, span);
265+
handle.Free();
266+
}
267+
else
268+
{
269+
for (int i = 0; i < count; i++)
270+
{
271+
writer(ref ctx, array[i]);
272+
}
247273
}
248274
}
249275
else
@@ -679,6 +705,24 @@ internal void SetCount(int targetCount)
679705
count = targetCount;
680706
}
681707

708+
[SecuritySafeCritical]
709+
private unsafe bool TryGetArrayAsSpanPinnedUnsafe(FieldCodec<T> codec, out Span<byte> span, out GCHandle handle)
710+
{
711+
// 1. protobuf wire bytes is LittleEndian only
712+
// 2. validate that size of csharp element T is matching the size of protobuf wire size
713+
// NOTE: cannot use bool with this span because csharp marshal it as 4 bytes
714+
if (BitConverter.IsLittleEndian && (codec.FixedSize > 0 && Marshal.SizeOf(typeof(T)) == codec.FixedSize))
715+
{
716+
handle = GCHandle.Alloc(array, GCHandleType.Pinned);
717+
span = new Span<byte>(handle.AddrOfPinnedObject().ToPointer(), array.Length * codec.FixedSize);
718+
return true;
719+
}
720+
721+
span = default;
722+
handle = default;
723+
return false;
724+
}
725+
682726
#region Explicit interface implementation for IList and ICollection.
683727
bool IList.IsFixedSize => false;
684728

csharp/src/Google.Protobuf/ParsingPrimitives.cs

+25-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
using System.Buffers;
1212
using System.Buffers.Binary;
1313
using System.Collections.Generic;
14+
using System.Diagnostics;
1415
using System.IO;
1516
using System.Runtime.CompilerServices;
1617
using System.Runtime.InteropServices;
@@ -45,7 +46,7 @@ public static int ParseLength(ref ReadOnlySpan<byte> buffer, ref ParserInternalS
4546

4647
/// <summary>
4748
/// Parses the next tag.
48-
/// If the end of logical stream was reached, an invalid tag of 0 is returned.
49+
/// If the end of logical stream was reached, an invalid tag of 0 is returned.
4950
/// </summary>
5051
public static uint ParseTag(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
5152
{
@@ -382,7 +383,7 @@ public static float ParseFloat(ref ReadOnlySpan<byte> buffer, ref ParserInternal
382383
// ReadUnaligned uses processor architecture for endianness.
383384
float result = Unsafe.ReadUnaligned<float>(ref MemoryMarshal.GetReference(buffer.Slice(state.bufferPos, length)));
384385
state.bufferPos += length;
385-
return result;
386+
return result;
386387
}
387388

388389
private static unsafe float ParseFloatSlow(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
@@ -737,7 +738,7 @@ public static uint ReadRawVarint32(Stream input)
737738
/// </summary>
738739
/// <remarks>
739740
/// ZigZag encodes signed integers into values that can be efficiently
740-
/// encoded with varint. (Otherwise, negative values must be
741+
/// encoded with varint. (Otherwise, negative values must be
741742
/// sign-extended to 32 bits to be varint encoded, thus always taking
742743
/// 5 bytes on the wire.)
743744
/// </remarks>
@@ -751,7 +752,7 @@ public static int DecodeZigZag32(uint n)
751752
/// </summary>
752753
/// <remarks>
753754
/// ZigZag encodes signed integers into values that can be efficiently
754-
/// encoded with varint. (Otherwise, negative values must be
755+
/// encoded with varint. (Otherwise, negative values must be
755756
/// sign-extended to 64 bits to be varint encoded, thus always taking
756757
/// 10 bytes on the wire.)
757758
/// </remarks>
@@ -810,5 +811,25 @@ private static void ReadRawBytesIntoSpan(ref ReadOnlySpan<byte> buffer, ref Pars
810811
state.bufferPos += unreadSpan.Length;
811812
}
812813
}
814+
815+
/// <summary>
816+
/// Read LittleEndian packed field from buffer of specified length into a span.
817+
/// The amount of data available and the current limit should be checked before calling this method.
818+
/// </summary>
819+
internal static void ReadPackedFieldLittleEndian(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int length, Span<byte> outBuffer)
820+
{
821+
Debug.Assert(BitConverter.IsLittleEndian);
822+
823+
if (length <= state.bufferSize - state.bufferPos)
824+
{
825+
buffer.Slice(state.bufferPos, length).CopyTo(outBuffer);
826+
state.bufferPos += length;
827+
}
828+
else
829+
{
830+
ReadRawBytesIntoSpan(ref buffer, ref state, length, outBuffer);
831+
}
832+
}
833+
813834
}
814835
}

0 commit comments

Comments
 (0)