Skip to content

Commit 4fbd317

Browse files
aclementsgopherbot
authored andcommitted
internal/simdgen: support masks and immediates
This significantly reworks the XED loader and tweaks the operand representation to support more than just vector registers. In particular, each operand now has a "class" string that determines the meaning of several other fields. We add AVX-512 == and < to demonstrate both masks and immediates. Change-Id: I6d025dbcb66e5914472b60697b3a7e4cc6174d78 Reviewed-on: https://go-review.googlesource.com/c/arch/+/667435 LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Austin Clements <[email protected]> Reviewed-by: Junyang Shao <[email protected]>
1 parent 097aeb1 commit 4fbd317

File tree

6 files changed

+691
-173
lines changed

6 files changed

+691
-173
lines changed

internal/simdgen/asm.yaml.toy

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,87 +6,102 @@
66
feature: "SSE2"
77
in:
88
- asmPos: 0
9+
class: vreg
910
base: float
10-
bits: 32
11-
w: 128
11+
elemBits: 32
12+
bits: 128
1213
- asmPos: 1
14+
class: vreg
1315
base: float
14-
bits: 32
15-
w: 128
16+
elemBits: 32
17+
bits: 128
1618
out:
1719
- asmPos: 0
20+
class: vreg
1821
base: float
19-
bits: 32
20-
w: 128
22+
elemBits: 32
23+
bits: 128
2124

2225
- asm: ADDPD
2326
goarch: amd64
2427
feature: "SSE2"
2528
in:
2629
- asmPos: 0
30+
class: vreg
2731
base: float
28-
bits: 64
29-
w: 128
32+
elemBits: 64
33+
bits: 128
3034
- asmPos: 1
35+
class: vreg
3136
base: float
32-
bits: 64
33-
w: 128
37+
elemBits: 64
38+
bits: 128
3439
out:
3540
- asmPos: 0
41+
class: vreg
3642
base: float
37-
bits: 64
38-
w: 128
43+
elemBits: 64
44+
bits: 128
3945

4046
- asm: PADDB
4147
goarch: amd64
4248
feature: "SSE2"
4349
in:
4450
- asmPos: 0
51+
class: vreg
4552
base: int|uint
46-
bits: 32
47-
w: 128
53+
elemBits: 32
54+
bits: 128
4855
- asmPos: 1
56+
class: vreg
4957
base: int|uint
50-
bits: 32
51-
w: 128
58+
elemBits: 32
59+
bits: 128
5260
out:
5361
- asmPos: 0
62+
class: vreg
5463
base: int|uint
55-
bits: 32
56-
w: 128
64+
elemBits: 32
65+
bits: 128
5766

5867
- asm: VPADDB
5968
goarch: amd64
6069
feature: "AVX"
6170
in:
6271
- asmPos: 1
72+
class: vreg
6373
base: int|uint
64-
bits: 8
65-
w: 128
74+
elemBits: 8
75+
bits: 128
6676
- asmPos: 2
77+
class: vreg
6778
base: int|uint
68-
bits: 8
69-
w: 128
79+
elemBits: 8
80+
bits: 128
7081
out:
7182
- asmPos: 0
83+
class: vreg
7284
base: int|uint
73-
bits: 8
74-
w: 128
85+
elemBits: 8
86+
bits: 128
7587

7688
- asm: VPADDB
7789
goarch: amd64
7890
feature: "AVX2"
7991
in:
8092
- asmPos: 1
93+
class: vreg
8194
base: int|uint
82-
bits: 8
83-
w: 256
95+
elemBits: 8
96+
bits: 256
8497
- asmPos: 2
98+
class: vreg
8599
base: int|uint
86-
bits: 8
87-
w: 256
100+
elemBits: 8
101+
bits: 256
88102
out:
89103
- asmPos: 0
104+
class: vreg
90105
base: int|uint
91-
bits: 8
92-
w: 256
106+
elemBits: 8
107+
bits: 256

internal/simdgen/go.yaml

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,207 @@
1717
- go: $t
1818
out:
1919
- go: $t
20+
21+
#
22+
# AVX-512 Comparisons
23+
#
24+
25+
# TODO(austin): I'm not happy with how much copy-pasting this requires. We could
26+
# do a functional abstraction, but that feels bolted on. Unification is *almost*
27+
# capable of collapsing all of this.
28+
#
29+
# One thing that might work is having a !let node that lets you extend the
30+
# nonDetEnv with explicit values:
31+
#
32+
# !let
33+
# - {$go: Equal, $imm: 0, $mask: K0}
34+
# - {$go: EqualMasked, $imm: 0, $mask: _}
35+
# - {$go: Less, $imm: 1, $mask: K0}
36+
# - {$go: LessMasked, $imm: 1, $mask: _}
37+
# - !let
38+
# - {$asm: "VPCMP[BWDQ]", $base: int}
39+
# - {$asm: "VPCMPU[BWDQ]", $base: uint}
40+
# - go: $go
41+
# asm: $asm
42+
# in:
43+
# - const: $mask
44+
# - base: $base
45+
# go: $t
46+
# - base: $base
47+
# go: $t
48+
# - class: immediate
49+
# const: $imm
50+
# out:
51+
# - class: mask
52+
#
53+
# That's not bad, but it's very hierachical. CUE has a "mixin" approach to this.
54+
#
55+
# - !unify
56+
# # All AVX-512 comparisons have the same basic operand shape
57+
# - {in: [_, {go: $t}, {go: $t}, _], out: [{class: mask}]}
58+
# # There are signed and unsigned variants
59+
# - !sum
60+
# - {asm: "VPCMP[BWDQ]", in: [_, {base: int}, {base: int}, _]}
61+
# - {asm: "VPCMPU[BWDQ]", in: [_, {base: uint}, {base: uint}, _]}
62+
# # Finally, list out the operations.
63+
# - !let
64+
# - $equal: {in: [_, _, _, {class: immedate, const: 0}]}
65+
# $less: {in: [_, _, _, {class: immedate, const: 1}]}
66+
# $masked: _
67+
# $unmasked: {in: [const: K0, _, _, _]}
68+
# - !sum
69+
# - !unify [go: Equal, $equal, $unmasked]
70+
# - !unify [go: EqualMasked, $equal, $masked]
71+
# - !unify [go: Less, $less, $unmasked]
72+
# - !unify [go: LessMasked, $less, $masked]
73+
#
74+
# Maybe !let is just a feature of !sum that introduces an environment factor for
75+
# all following branches? That would let me do the above in-line with the big
76+
# top-level !sum:
77+
#
78+
# - !sum
79+
# ...
80+
# - !let # Adds a factor that is the sum of the following terms:
81+
# - {$go: Equal, $imm: 0, $mask: K0}
82+
# - {$go: EqualMasked, $imm: 0, $mask: _}
83+
# - {$go: Less, $imm: 1, $mask: K0}
84+
# - {$go: LessMasked, $imm: 1, $mask: _}
85+
# - !let # Adds another factor:
86+
# - {$asm: "VPCMP[BWDQ]", $base: int}
87+
# - {$asm: "VPCMPU[BWDQ]", $base: uint}
88+
# - go: $go
89+
# asm: $asm
90+
# in:
91+
# - const: $mask
92+
# - base: $base
93+
# go: $t
94+
# - base: $base
95+
# go: $t
96+
# - class: immediate
97+
# const: $imm
98+
# out:
99+
# - class: mask
100+
#
101+
# I may need to choose names more carefully in that case. This is a general
102+
# problem with names being file-global. (This is less of a problem with the
103+
# mixin style because those names tend to be more specific anyway.) Or maybe it
104+
# makes sense for each !let to introduce fresh idents, even if the string names
105+
# are the same?
106+
107+
- go: Equal
108+
goarch: amd64
109+
asm: "VPCMP[BWDQ]" # Signed comparison
110+
in:
111+
- const: K0
112+
- base: int
113+
go: $t
114+
- base: int
115+
go: $t
116+
- class: immediate
117+
const: 0
118+
out:
119+
- class: mask
120+
121+
- go: EqualMasked
122+
goarch: amd64
123+
asm: "VPCMP[BWDQ]" # Signed comparison
124+
in:
125+
- _
126+
- base: int
127+
go: $t
128+
- base: int
129+
go: $t
130+
- class: immediate
131+
const: 0
132+
out:
133+
- class: mask
134+
135+
- go: Equal
136+
goarch: amd64
137+
asm: "VPCMPU[BWDQ]" # Unsigned comparison
138+
in:
139+
- const: K0
140+
- base: uint
141+
go: $t
142+
- base: uint
143+
go: $t
144+
- class: immediate
145+
const: 0
146+
out:
147+
- class: mask
148+
149+
- go: EqualMasked
150+
goarch: amd64
151+
asm: "VPCMPU[BWDQ]" # Unsigned comparison
152+
in:
153+
- _
154+
- base: uint
155+
go: $t
156+
- base: uint
157+
go: $t
158+
- class: immediate
159+
const: 0
160+
out:
161+
- class: mask
162+
163+
- go: Less
164+
goarch: amd64
165+
asm: "VPCMP[BWDQ]" # Signed comparison
166+
in:
167+
- const: K0
168+
- base: int
169+
go: $t
170+
- base: int
171+
go: $t
172+
- class: immediate
173+
const: 1
174+
out:
175+
- class: mask
176+
177+
- go: LessMasked
178+
goarch: amd64
179+
asm: "VPCMP[BWDQ]" # Signed comparison
180+
in:
181+
- _
182+
- base: int
183+
go: $t
184+
- base: int
185+
go: $t
186+
- class: immediate
187+
const: 1
188+
out:
189+
- class: mask
190+
191+
- go: Less
192+
goarch: amd64
193+
asm: "VPCMPU[BWDQ]" # Unsigned comparison
194+
in:
195+
- const: K0
196+
- base: uint
197+
go: $t
198+
- base: uint
199+
go: $t
200+
- class: immediate
201+
const: 1
202+
out:
203+
- class: mask
204+
205+
- go: LessMasked
206+
goarch: amd64
207+
asm: "VPCMPU[BWDQ]" # Unsigned comparison
208+
in:
209+
- _
210+
- base: uint
211+
go: $t
212+
- base: uint
213+
go: $t
214+
- class: immediate
215+
const: 1
216+
out:
217+
- class: mask
218+
219+
# TODO:
220+
# 2: OP := LE;
221+
# 4: OP := NEQ;
222+
# 5: OP := NLT;
223+
# 6: OP := NLE;

0 commit comments

Comments
 (0)