Skip to content

CQ: Big Value Type Suboptimal Inlining Code Generation #39732

Closed
@nietras

Description

@nietras

Improve support for "functor" pattern in .NET, reduce code duplication and help resolve an issue in #39543 which tries to consolidate sorting code on a generic TComparer code path. It would be great if the JIT could be improved in the face of big value types.

Benchmark Code

using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Running;

public class Program
{
    static void Main(string[] args) => BenchmarkSwitcher.FromAssemblies(new[] { typeof(Program).Assembly }).Run(args);

    [DisassemblyDiagnoser]
    public class CompareBigStruct
    {
        Random _random;
        readonly Comparison<BigStruct> _comparison;
        readonly Comparison<BigStruct> _comparisonDefaultComparer;
        readonly Comparer<BigStruct> _defaultComparer;
        readonly StructComparer<BigStruct> _structComparer;
        readonly StructComparisonComparer<BigStruct> _structComparisonComparer;

        public CompareBigStruct()
        {
            _defaultComparer = Comparer<BigStruct>.Default;
            _structComparer = new StructComparer<BigStruct>();
            _comparison = (x, y) => x.CompareTo(y);
            _comparisonDefaultComparer = _defaultComparer.Compare;
            _structComparisonComparer = new StructComparisonComparer<BigStruct>(_comparison);
        }

        public BigStruct X { get; set; }

        public BigStruct Y { get; set; }

        [GlobalSetup]
        public void Setup()
        {
            _random = new Random(21317834);
            X = new BigStruct(_random.Next());
            Y = new BigStruct(_random.Next());
        }

        [Benchmark(Baseline = true)]
        public int Comparison() => _comparison(X, Y);

        [Benchmark()]
        public int ComparisonDefaultComparer() => _comparisonDefaultComparer(X, Y);

        [Benchmark]
        public int ComparerDefault() => _defaultComparer.Compare(X, Y);

        [Benchmark]
        public int ComparerStruct() => _structComparer.Compare(X, Y);

        [Benchmark]
        public int ComparerStructComparison() => _structComparisonComparer.Compare(X, Y);
    }

    internal readonly struct StructComparer<T> 
        : IComparer<T>
        where T : IComparable<T>
    {
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public int Compare(T x, T y) => x.CompareTo(y);
    }

    internal readonly struct StructComparisonComparer<T> : IComparer<T>
    {
        private readonly Comparison<T> _comparison;

        public StructComparisonComparer(Comparison<T> comparison) =>
            _comparison = comparison;

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public int Compare( T x, T y) => _comparison(x, y);
    }

    public readonly struct BigStruct : IComparable<BigStruct>
    {
        private readonly long _long;
        private readonly int _int0;
        private readonly int _int1;
        private readonly short _short0;
        private readonly short _short1;
        private readonly short _short2;
        private readonly short _short3;
        private readonly double _double;

        public BigStruct(int value)
        {
            _long = value;
            _int0 = value;
            _int1 = value;
            _short0 = (short)value;
            _short1 = (short)value;
            _short2 = (short)value;
            _short3 = (short)value;
            _double = value;
        }

        public int CompareTo(BigStruct other) => _int1.CompareTo(other._int1);
    }
}

Benchmark Results

BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.388 (2004/?/20H1)
Intel Core i7-8700 CPU 3.20GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores
.NET Core SDK=5.0.100-rc.1.20367.2
  [Host]     : .NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT
  DefaultJob : .NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT

Method Mean Error StdDev Ratio Code Size
Comparison 3.500 ns 0.0065 ns 0.0051 ns 1.00 144 B
ComparisonDefaultComparer 3.366 ns 0.0071 ns 0.0067 ns 0.96 144 B
ComparerDefault 3.661 ns 0.0059 ns 0.0049 ns 1.05 160 B
ComparerStruct 2.232 ns 0.0066 ns 0.0058 ns 0.64 142 B
ComparerStructComparison 5.269 ns 0.0071 ns 0.0060 ns 1.51 219 B

Benchmark Disassembly

The problem is in the code generated for CompareBigStruct.ComparerStructComparison which can be easily seen.

.NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT

; Program+CompareBigStruct.Comparison()
       sub       rsp,0A8
       vzeroupper
       mov       rax,[rcx+10]
       vmovdqu   xmm0,xmmword ptr [rcx+38]
       vmovdqu   xmmword ptr [rsp+88],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+48]
       vmovdqu   xmmword ptr [rsp+98],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+58]
       vmovdqu   xmmword ptr [rsp+68],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+68]
       vmovdqu   xmmword ptr [rsp+78],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+88]
       vmovdqu   xmmword ptr [rsp+48],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+98]
       vmovdqu   xmmword ptr [rsp+58],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+68]
       vmovdqu   xmmword ptr [rsp+28],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+78]
       vmovdqu   xmmword ptr [rsp+38],xmm0
       mov       rcx,[rax+8]
       lea       rdx,[rsp+48]
       lea       r8,[rsp+28]
       call      qword ptr [rax+18]
       nop
       add       rsp,0A8
       ret
; Total bytes of code 144

.NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT

; Program+CompareBigStruct.ComparisonDefaultComparer()
       sub       rsp,0A8
       vzeroupper
       mov       rax,[rcx+18]
       vmovdqu   xmm0,xmmword ptr [rcx+38]
       vmovdqu   xmmword ptr [rsp+88],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+48]
       vmovdqu   xmmword ptr [rsp+98],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+58]
       vmovdqu   xmmword ptr [rsp+68],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+68]
       vmovdqu   xmmword ptr [rsp+78],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+88]
       vmovdqu   xmmword ptr [rsp+48],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+98]
       vmovdqu   xmmword ptr [rsp+58],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+68]
       vmovdqu   xmmword ptr [rsp+28],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+78]
       vmovdqu   xmmword ptr [rsp+38],xmm0
       mov       rcx,[rax+8]
       lea       rdx,[rsp+48]
       lea       r8,[rsp+28]
       call      qword ptr [rax+18]
       nop
       add       rsp,0A8
       ret
; Total bytes of code 144

.NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT

; Program+CompareBigStruct.ComparerDefault()
       sub       rsp,0A8
       vzeroupper
       mov       rdx,[rcx+20]
       vmovdqu   xmm0,xmmword ptr [rcx+38]
       vmovdqu   xmmword ptr [rsp+88],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+48]
       vmovdqu   xmmword ptr [rsp+98],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+58]
       vmovdqu   xmmword ptr [rsp+68],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+68]
       vmovdqu   xmmword ptr [rsp+78],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+88]
       vmovdqu   xmmword ptr [rsp+48],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+98]
       vmovdqu   xmmword ptr [rsp+58],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+68]
       vmovdqu   xmmword ptr [rsp+28],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+78]
       vmovdqu   xmmword ptr [rsp+38],xmm0
       mov       [rsp+20],rdx
       mov       rcx,rdx
       lea       rdx,[rsp+48]
       lea       r8,[rsp+28]
       mov       rax,[rsp+20]
       mov       rax,[rax]
       mov       rax,[rax+40]
       call      qword ptr [rax+20]
       nop
       add       rsp,0A8
       ret
; Total bytes of code 160

.NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT

; Program+CompareBigStruct.ComparerStruct()
       sub       rsp,88
       vzeroupper
       vmovdqu   xmm0,xmmword ptr [rcx+38]
       vmovdqu   xmmword ptr [rsp+68],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+48]
       vmovdqu   xmmword ptr [rsp+78],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+58]
       vmovdqu   xmmword ptr [rsp+48],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+68]
       vmovdqu   xmmword ptr [rsp+58],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+68]
       vmovdqu   xmmword ptr [rsp+28],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+78]
       vmovdqu   xmmword ptr [rsp+38],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+48]
       vmovdqu   xmmword ptr [rsp+8],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+58]
       vmovdqu   xmmword ptr [rsp+18],xmm0
       mov       eax,[rsp+14]
       cmp       [rsp+34],eax
       jge       short M00_L00
       mov       eax,0FFFFFFFF
       jmp       short M00_L02
M00_L00:
       cmp       [rsp+34],eax
       jle       short M00_L01
       mov       eax,1
       jmp       short M00_L02
M00_L01:
       xor       eax,eax
M00_L02:
       add       rsp,88
       ret
; Total bytes of code 142

.NET Core 5.0.0 (CoreCLR 5.0.20.36102, CoreFX 5.0.20.36102), X64 RyuJIT

; Program+CompareBigStruct.ComparerStructComparison()
       sub       rsp,0E8
       vzeroupper
       lea       rdx,[rcx+30]
       vmovdqu   xmm0,xmmword ptr [rcx+38]
       vmovdqu   xmmword ptr [rsp+0C8],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+48]
       vmovdqu   xmmword ptr [rsp+0D8],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+58]
       vmovdqu   xmmword ptr [rsp+0A8],xmm0
       vmovdqu   xmm0,xmmword ptr [rcx+68]
       vmovdqu   xmmword ptr [rsp+0B8],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+0C8]
       vmovdqu   xmmword ptr [rsp+88],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+0D8]
       vmovdqu   xmmword ptr [rsp+98],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+0A8]
       vmovdqu   xmmword ptr [rsp+68],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+0B8]
       vmovdqu   xmmword ptr [rsp+78],xmm0
       mov       rax,[rdx]
       vmovdqu   xmm0,xmmword ptr [rsp+88]
       vmovdqu   xmmword ptr [rsp+48],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+98]
       vmovdqu   xmmword ptr [rsp+58],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+68]
       vmovdqu   xmmword ptr [rsp+28],xmm0
       vmovdqu   xmm0,xmmword ptr [rsp+78]
       vmovdqu   xmmword ptr [rsp+38],xmm0
       mov       rcx,[rax+8]
       lea       rdx,[rsp+48]
       lea       r8,[rsp+28]
       call      qword ptr [rax+18]
       nop
       add       rsp,0E8
       ret
; Total bytes of code 219

cc: @jkotas @dotnet/jit-contrib

category:cq
theme:structs
skill-level:expert
cost:large
impact:medium

Metadata

Metadata

Assignees

Labels

area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issue

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions