Skip to content

Commit 5650c93

Browse files
authored
codegen: add optimizations for swapfield and replacefield (#41275)
1 parent 42da3d4 commit 5650c93

File tree

8 files changed

+510
-190
lines changed

8 files changed

+510
-190
lines changed

src/builtins.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -967,7 +967,9 @@ JL_CALLABLE(jl_f_replacefield)
967967
JL_TYPECHK(replacefield!, symbol, args[5]);
968968
failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[5], 1, 0);
969969
}
970-
// TODO: filter more invalid ordering combinations
970+
if (failure_order > success_order)
971+
jl_atomic_error("invalid atomic ordering");
972+
// TODO: filter more invalid ordering combinations?
971973
jl_value_t *v = args[0];
972974
jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
973975
size_t idx = get_checked_fieldindex("replacefield!", st, v, args[1], 1);
@@ -978,8 +980,6 @@ JL_CALLABLE(jl_f_replacefield)
978980
if (isatomic == (failure_order == jl_memory_order_notatomic))
979981
jl_atomic_error(isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
980982
: "replacefield!: non-atomic field cannot be accessed atomically");
981-
if (failure_order > success_order)
982-
jl_atomic_error("invalid atomic ordering");
983983
v = replace_nth_field(st, v, idx, args[2], args[3], isatomic); // always seq_cst, if isatomic needed at all
984984
return v;
985985
}

src/cgutils.cpp

Lines changed: 356 additions & 67 deletions
Large diffs are not rendered by default.

src/codegen.cpp

Lines changed: 67 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,8 @@ static const std::map<jl_fptr_args_t, JuliaFunction*> builtin_func_map = {
879879
{ &jl_f_isdefined, new JuliaFunction{"jl_f_isdefined", get_func_sig, get_func_attrs} },
880880
{ &jl_f_getfield, new JuliaFunction{"jl_f_getfield", get_func_sig, get_func_attrs} },
881881
{ &jl_f_setfield, new JuliaFunction{"jl_f_setfield", get_func_sig, get_func_attrs} },
882+
{ &jl_f_swapfield, new JuliaFunction{"jl_f_swapfield", get_func_sig, get_func_attrs} },
883+
{ &jl_f_modifyfield, new JuliaFunction{"jl_f_modifyfield", get_func_sig, get_func_attrs} },
882884
{ &jl_f_fieldtype, new JuliaFunction{"jl_f_fieldtype", get_func_sig, get_func_attrs} },
883885
{ &jl_f_nfields, new JuliaFunction{"jl_f_nfields", get_func_sig, get_func_attrs} },
884886
{ &jl_f__expr, new JuliaFunction{"jl_f__expr", get_func_sig, get_func_attrs} },
@@ -1160,6 +1162,8 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
11601162
jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
11611163
static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
11621164
jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
1165+
static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
1166+
Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
11631167

11641168
static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
11651169
static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
@@ -1441,6 +1445,17 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
14411445

14421446
// --- utilities ---
14431447

1448+
static Constant *undef_value_for_type(Type *T) {
1449+
auto tracked = CountTrackedPointers(T);
1450+
Constant *undef;
1451+
if (tracked.count)
1452+
// make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
1453+
undef = Constant::getNullValue(T);
1454+
else
1455+
undef = UndefValue::get(T);
1456+
return undef;
1457+
}
1458+
14441459
static void CreateTrap(IRBuilder<> &irbuilder)
14451460
{
14461461
Function *f = irbuilder.GetInsertBlock()->getParent();
@@ -1472,6 +1487,7 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
14721487
#endif
14731488
#endif
14741489

1490+
14751491
#include "cgutils.cpp"
14761492

14771493
static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
@@ -2351,62 +2367,6 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *
23512367
return emit_checked_var(ctx, bp, name, false, tbaa_binding);
23522368
}
23532369

2354-
template<typename Func>
2355-
static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Constant *defval, Func &&func)
2356-
{
2357-
if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
2358-
if (Cond->isZero())
2359-
return defval;
2360-
return func();
2361-
}
2362-
BasicBlock *currBB = ctx.builder.GetInsertBlock();
2363-
BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "guard_pass", ctx.f);
2364-
BasicBlock *exitBB = BasicBlock::Create(jl_LLVMContext, "guard_exit", ctx.f);
2365-
ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
2366-
ctx.builder.SetInsertPoint(passBB);
2367-
auto res = func();
2368-
passBB = ctx.builder.GetInsertBlock();
2369-
ctx.builder.CreateBr(exitBB);
2370-
ctx.builder.SetInsertPoint(exitBB);
2371-
if (defval == nullptr)
2372-
return nullptr;
2373-
PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
2374-
phi->addIncoming(defval, currBB);
2375-
phi->addIncoming(res, passBB);
2376-
return phi;
2377-
}
2378-
2379-
template<typename Func>
2380-
static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, bool defval, Func &&func)
2381-
{
2382-
return emit_guarded_test(ctx, ifnot, ConstantInt::get(T_int1, defval), func);
2383-
}
2384-
2385-
template<typename Func>
2386-
static Value *emit_nullcheck_guard(jl_codectx_t &ctx, Value *nullcheck, Func &&func)
2387-
{
2388-
if (!nullcheck)
2389-
return func();
2390-
return emit_guarded_test(ctx, null_pointer_cmp(ctx, nullcheck), false, func);
2391-
}
2392-
2393-
template<typename Func>
2394-
static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
2395-
Value *nullcheck2, Func &&func)
2396-
{
2397-
if (!nullcheck1)
2398-
return emit_nullcheck_guard(ctx, nullcheck2, func);
2399-
if (!nullcheck2)
2400-
return emit_nullcheck_guard(ctx, nullcheck1, func);
2401-
nullcheck1 = null_pointer_cmp(ctx, nullcheck1);
2402-
nullcheck2 = null_pointer_cmp(ctx, nullcheck2);
2403-
// If both are NULL, return true.
2404-
return emit_guarded_test(ctx, ctx.builder.CreateOr(nullcheck1, nullcheck2), true, [&] {
2405-
return emit_guarded_test(ctx, ctx.builder.CreateAnd(nullcheck1, nullcheck2),
2406-
false, func);
2407-
});
2408-
}
2409-
24102370
static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
24112371
Value *nullcheck1, Value *nullcheck2)
24122372
{
@@ -2443,8 +2403,6 @@ static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const
24432403
}
24442404

24452405
static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2);
2446-
static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
2447-
Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
24482406

24492407
static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2)
24502408
{
@@ -3011,13 +2969,18 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
30112969
else {
30122970
typed_store(ctx,
30132971
emit_arrayptr(ctx, ary, ary_ex, isboxed),
3014-
idx, val, ety,
2972+
idx, val, jl_cgval_t(), ety,
30152973
isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
30162974
ctx.aliasscope,
30172975
data_owner,
30182976
isboxed,
30192977
isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
3020-
0);
2978+
isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
2979+
0,
2980+
false,
2981+
true,
2982+
false,
2983+
false);
30212984
}
30222985
}
30232986
*ret = ary;
@@ -3158,19 +3121,34 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
31583121
return false;
31593122
}
31603123

3161-
else if (f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) {
3124+
else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
3125+
(f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
3126+
(f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6))) {
3127+
bool issetfield = f == jl_builtin_setfield;
3128+
bool isreplacefield = f == jl_builtin_replacefield;
3129+
const jl_cgval_t undefval;
31623130
const jl_cgval_t &obj = argv[1];
31633131
const jl_cgval_t &fld = argv[2];
3164-
const jl_cgval_t &val = argv[3];
3132+
const jl_cgval_t &val = argv[isreplacefield ? 4 : 3];
3133+
const jl_cgval_t &cmp = isreplacefield ? argv[3] : undefval;
31653134
enum jl_memory_order order = jl_memory_order_notatomic;
3166-
if (nargs == 4) {
3167-
const jl_cgval_t &ord = argv[4];
3168-
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "setfield!");
3135+
if (nargs >= (isreplacefield ? 5 : 4)) {
3136+
const jl_cgval_t &ord = argv[isreplacefield ? 5 : 4];
3137+
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type,
3138+
issetfield ? "setfield!" : isreplacefield ? "replacefield!" : "swapfield!");
31693139
if (!ord.constant)
31703140
return false;
3171-
order = jl_get_atomic_order((jl_sym_t*)ord.constant, false, true);
3141+
order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
31723142
}
3173-
if (order == jl_memory_order_invalid) {
3143+
enum jl_memory_order fail_order = order;
3144+
if (isreplacefield && nargs == 6) {
3145+
const jl_cgval_t &ord = argv[6];
3146+
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "replacefield!");
3147+
if (!ord.constant)
3148+
return false;
3149+
fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
3150+
}
3151+
if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
31743152
emit_atomic_error(ctx, "invalid atomic ordering");
31753153
*ret = jl_cgval_t(); // unreachable
31763154
return true;
@@ -3189,27 +3167,39 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
31893167
}
31903168
if (idx != -1) {
31913169
jl_value_t *ft = jl_svecref(uty->types, idx);
3192-
if (jl_subtype(val.typ, ft)) {
3170+
if (!jl_has_free_typevars(ft) && jl_subtype(val.typ, ft)) {
31933171
// TODO: attempt better codegen for approximate types
31943172
bool isboxed = jl_field_isptr(uty, idx);
31953173
bool isatomic = jl_field_isatomic(uty, idx);
31963174
bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
31973175
if (isatomic == (order == jl_memory_order_notatomic)) {
31983176
emit_atomic_error(ctx,
3199-
isatomic ? "setfield!: atomic field cannot be written non-atomically"
3200-
: "setfield!: non-atomic field cannot be written atomically");
3177+
issetfield ?
3178+
(isatomic ? "setfield!: atomic field cannot be written non-atomically"
3179+
: "setfield!: non-atomic field cannot be written atomically") :
3180+
isreplacefield ?
3181+
(isatomic ? "replacefield!: atomic field cannot be written non-atomically"
3182+
: "replacefield!: non-atomic field cannot be written atomically") :
3183+
(isatomic ? "swapfield!: atomic field cannot be written non-atomically"
3184+
: "swapfield!: non-atomic field cannot be written atomically"));
3185+
*ret = jl_cgval_t();
3186+
return true;
3187+
}
3188+
if (isatomic == (fail_order == jl_memory_order_notatomic)) {
3189+
emit_atomic_error(ctx,
3190+
(isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
3191+
: "replacefield!: non-atomic field cannot be accessed atomically"));
32013192
*ret = jl_cgval_t();
32023193
return true;
32033194
}
3204-
if (needlock)
3205-
emit_lockstate_value(ctx, obj, true);
3206-
emit_setfield(ctx, uty, obj, idx, val, true, true,
3195+
*ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true, true,
32073196
(needlock || order <= jl_memory_order_notatomic)
32083197
? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
3209-
: get_llvm_atomic_order(order));
3210-
if (needlock)
3211-
emit_lockstate_value(ctx, obj, false);
3212-
*ret = val;
3198+
: get_llvm_atomic_order(order),
3199+
(needlock || fail_order <= jl_memory_order_notatomic)
3200+
? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
3201+
: get_llvm_atomic_order(fail_order),
3202+
needlock, issetfield, isreplacefield);
32133203
return true;
32143204
}
32153205
}
@@ -7233,17 +7223,6 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
72337223
ctx.builder.SetCurrentDebugLocation(noDbg);
72347224
ctx.builder.ClearInsertionPoint();
72357225

7236-
auto undef_value_for_type = [&](Type *T) {
7237-
auto tracked = CountTrackedPointers(T);
7238-
Constant *undef;
7239-
if (tracked.count)
7240-
// make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
7241-
undef = Constant::getNullValue(T);
7242-
else
7243-
undef = UndefValue::get(T);
7244-
return undef;
7245-
};
7246-
72477226
// Codegen Phi nodes
72487227
std::map<std::pair<BasicBlock*, BasicBlock*>, BasicBlock*> BB_rewrite_map;
72497228
std::vector<llvm::PHINode*> ToDelete;

src/intrinsics.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,8 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
690690
assert(!isboxed);
691691
if (!type_is_ghost(ptrty)) {
692692
thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
693-
typed_store(ctx, thePtr, im1, x, ety, tbaa_data, nullptr, nullptr, isboxed, AtomicOrdering::NotAtomic, align_nb);
693+
typed_store(ctx, thePtr, im1, x, jl_cgval_t(), ety, tbaa_data, nullptr, nullptr, isboxed,
694+
AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false);
694695
}
695696
}
696697
return e;

src/llvm-gc-invariant-verifier.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,17 @@ struct GCInvariantVerifier : public FunctionPass, public InstVisitor<GCInvariant
5555

5656
bool runOnFunction(Function &F) override;
5757
void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
58-
void visitStoreInst(StoreInst &SI);
5958
void visitLoadInst(LoadInst &LI);
59+
void visitStoreInst(StoreInst &SI);
60+
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI);
61+
void visitAtomicRMWInst(AtomicRMWInst &SI);
6062
void visitReturnInst(ReturnInst &RI);
6163
void visitGetElementPtrInst(GetElementPtrInst &GEP);
6264
void visitIntToPtrInst(IntToPtrInst &IPI);
6365
void visitPtrToIntInst(PtrToIntInst &PII);
6466
void visitCallInst(CallInst &CI);
67+
68+
void checkStoreInst(Type *VTy, unsigned AS, Value &SI);
6569
};
6670

6771
void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
@@ -80,8 +84,7 @@ void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
8084
"Illegal address space cast from decayed ptr", &I);
8185
}
8286

83-
void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
84-
Type *VTy = SI.getValueOperand()->getType();
87+
void GCInvariantVerifier::checkStoreInst(Type *VTy, unsigned AS, Value &SI) {
8588
if (VTy->isPointerTy()) {
8689
/* We currently don't obey this for arguments. That's ok - they're
8790
externally rooted. */
@@ -90,12 +93,23 @@ void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
9093
AS != AddressSpace::Derived,
9194
"Illegal store of decayed value", &SI);
9295
}
93-
VTy = SI.getPointerOperand()->getType();
94-
if (VTy->isPointerTy()) {
95-
unsigned AS = cast<PointerType>(VTy)->getAddressSpace();
96-
Check(AS != AddressSpace::CalleeRooted,
97-
"Illegal store to callee rooted value", &SI);
98-
}
96+
Check(AS != AddressSpace::CalleeRooted,
97+
"Illegal store to callee rooted value", &SI);
98+
}
99+
100+
void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
101+
Type *VTy = SI.getValueOperand()->getType();
102+
checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
103+
}
104+
105+
void GCInvariantVerifier::visitAtomicRMWInst(AtomicRMWInst &SI) {
106+
Type *VTy = SI.getValOperand()->getType();
107+
checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
108+
}
109+
110+
void GCInvariantVerifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
111+
Type *VTy = SI.getNewValOperand()->getType();
112+
checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
99113
}
100114

101115
void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {

0 commit comments

Comments
 (0)