Skip to content

Commit 41d6520

Browse files
chleroympe
authored andcommitted
powerpc/atomics: Use immediate operand when possible
Today we get the following code generation for atomic operations: c001bb2c: 39 20 00 01 li r9,1 c001bb30: 7d 40 18 28 lwarx r10,0,r3 c001bb34: 7d 09 50 50 subf r8,r9,r10 c001bb38: 7d 00 19 2d stwcx. r8,0,r3 c001c7a8: 39 40 00 01 li r10,1 c001c7ac: 7d 00 18 28 lwarx r8,0,r3 c001c7b0: 7c ea 42 14 add r7,r10,r8 c001c7b4: 7c e0 19 2d stwcx. r7,0,r3 By allowing GCC to choose between immediate or regular operation, we get: c001bb2c: 7d 20 18 28 lwarx r9,0,r3 c001bb30: 39 49 ff ff addi r10,r9,-1 c001bb34: 7d 40 19 2d stwcx. r10,0,r3 -- c001c7a4: 7d 40 18 28 lwarx r10,0,r3 c001c7a8: 39 0a 00 01 addi r8,r10,1 c001c7ac: 7d 00 19 2d stwcx. r8,0,r3 For "and", the dot form has to be used because "andi" doesn't exist. For logical operations we use unsigned 16 bits immediate. For arithmetic operations we use signed 16 bits immediate. On pmac32_defconfig, it reduces the text by approx another 8 kbytes. Signed-off-by: Christophe Leroy <[email protected]> Acked-by: Segher Boessenkool <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/2ec558d44db8045752fe9dbd29c9ba84bab6030b.1632236981.git.christophe.leroy@csgroup.eu
1 parent fb35078 commit 41d6520

File tree

1 file changed

+28
-28
lines changed

1 file changed

+28
-28
lines changed

arch/powerpc/include/asm/atomic.h

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -37,62 +37,62 @@ static __inline__ void arch_atomic_set(atomic_t *v, int i)
3737
__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
3838
}
3939

40-
#define ATOMIC_OP(op, asm_op) \
40+
#define ATOMIC_OP(op, asm_op, suffix, sign, ...) \
4141
static __inline__ void arch_atomic_##op(int a, atomic_t *v) \
4242
{ \
4343
int t; \
4444
\
4545
__asm__ __volatile__( \
4646
"1: lwarx %0,0,%3 # atomic_" #op "\n" \
47-
#asm_op " %0,%2,%0\n" \
47+
#asm_op "%I2" suffix " %0,%0,%2\n" \
4848
" stwcx. %0,0,%3 \n" \
4949
" bne- 1b\n" \
5050
: "=&r" (t), "+m" (v->counter) \
51-
: "r" (a), "r" (&v->counter) \
52-
: "cc"); \
51+
: "r"#sign (a), "r" (&v->counter) \
52+
: "cc", ##__VA_ARGS__); \
5353
} \
5454

55-
#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
55+
#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...) \
5656
static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \
5757
{ \
5858
int t; \
5959
\
6060
__asm__ __volatile__( \
6161
"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
62-
#asm_op " %0,%2,%0\n" \
62+
#asm_op "%I2" suffix " %0,%0,%2\n" \
6363
" stwcx. %0,0,%3\n" \
6464
" bne- 1b\n" \
6565
: "=&r" (t), "+m" (v->counter) \
66-
: "r" (a), "r" (&v->counter) \
67-
: "cc"); \
66+
: "r"#sign (a), "r" (&v->counter) \
67+
: "cc", ##__VA_ARGS__); \
6868
\
6969
return t; \
7070
}
7171

72-
#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
72+
#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...) \
7373
static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
7474
{ \
7575
int res, t; \
7676
\
7777
__asm__ __volatile__( \
7878
"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
79-
#asm_op " %1,%3,%0\n" \
79+
#asm_op "%I3" suffix " %1,%0,%3\n" \
8080
" stwcx. %1,0,%4\n" \
8181
" bne- 1b\n" \
8282
: "=&r" (res), "=&r" (t), "+m" (v->counter) \
83-
: "r" (a), "r" (&v->counter) \
84-
: "cc"); \
83+
: "r"#sign (a), "r" (&v->counter) \
84+
: "cc", ##__VA_ARGS__); \
8585
\
8686
return res; \
8787
}
8888

89-
#define ATOMIC_OPS(op, asm_op) \
90-
ATOMIC_OP(op, asm_op) \
91-
ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
92-
ATOMIC_FETCH_OP_RELAXED(op, asm_op)
89+
#define ATOMIC_OPS(op, asm_op, suffix, sign, ...) \
90+
ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__) \
91+
ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\
92+
ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)
9393

94-
ATOMIC_OPS(add, add)
95-
ATOMIC_OPS(sub, subf)
94+
ATOMIC_OPS(add, add, "c", I, "xer")
95+
ATOMIC_OPS(sub, sub, "c", I, "xer")
9696

9797
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
9898
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
@@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf)
101101
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
102102

103103
#undef ATOMIC_OPS
104-
#define ATOMIC_OPS(op, asm_op) \
105-
ATOMIC_OP(op, asm_op) \
106-
ATOMIC_FETCH_OP_RELAXED(op, asm_op)
104+
#define ATOMIC_OPS(op, asm_op, suffix, sign) \
105+
ATOMIC_OP(op, asm_op, suffix, sign) \
106+
ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign)
107107

108-
ATOMIC_OPS(and, and)
109-
ATOMIC_OPS(or, or)
110-
ATOMIC_OPS(xor, xor)
108+
ATOMIC_OPS(and, and, ".", K)
109+
ATOMIC_OPS(or, or, "", K)
110+
ATOMIC_OPS(xor, xor, "", K)
111111

112112
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
113113
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
@@ -241,15 +241,15 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
241241
"1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\
242242
cmpw 0,%0,%3 \n\
243243
beq 2f \n\
244-
add %0,%2,%0 \n"
244+
add%I2c %0,%0,%2 \n"
245245
" stwcx. %0,0,%1 \n\
246246
bne- 1b \n"
247247
PPC_ATOMIC_EXIT_BARRIER
248-
" subf %0,%2,%0 \n\
248+
" sub%I2c %0,%0,%2 \n\
249249
2:"
250250
: "=&r" (t)
251-
: "r" (&v->counter), "r" (a), "r" (u)
252-
: "cc", "memory");
251+
: "r" (&v->counter), "rI" (a), "r" (u)
252+
: "cc", "memory", "xer");
253253

254254
return t;
255255
}

0 commit comments

Comments
 (0)