Skip to content

Commit 3ce74c3

Browse files
authored
[CIR] Upstream support for string literals (#140796)
This adds the minimal support needed to handle string literals.
1 parent e4e7a7e commit 3ce74c3

File tree

10 files changed

+231
-0
lines changed

10 files changed

+231
-0
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
1414
#include "clang/CIR/Dialect/IR/CIRDialect.h"
1515
#include "clang/CIR/Dialect/IR/CIRTypes.h"
16+
#include "clang/CIR/MissingFeatures.h"
1617
#include "llvm/ADT/STLForwardCompat.h"
1718
#include "llvm/Support/ErrorHandling.h"
1819

@@ -177,6 +178,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
177178
return create<cir::AllocaOp>(loc, addrType, type, name, alignment);
178179
}
179180

181+
mlir::Value createGetGlobal(mlir::Location loc, cir::GlobalOp global) {
182+
assert(!cir::MissingFeatures::addressSpace());
183+
return create<cir::GetGlobalOp>(loc, getPointerTo(global.getSymType()),
184+
global.getSymName());
185+
}
186+
180187
cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
181188
bool isVolatile = false, uint64_t alignment = 0) {
182189
mlir::IntegerAttr intAttr;

clang/include/clang/CIR/MissingFeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ struct MissingFeatures {
3838
static bool opGlobalWeakRef() { return false; }
3939
static bool opGlobalLinkage() { return false; }
4040
static bool opGlobalSetVisitibility() { return false; }
41+
static bool opGlobalUnnamedAddr() { return false; }
4142

4243
static bool supportIFuncAttr() { return false; }
4344
static bool supportVisibility() { return false; }

clang/lib/CIR/CodeGen/CIRGenBuilder.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,34 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
2626
CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc)
2727
: CIRBaseBuilderTy(mlirContext), typeCache(tc) {}
2828

29+
/// Get a cir::ConstArrayAttr for a string literal.
30+
/// Note: This is different from what is returned by
31+
/// mlir::Builder::getStringAttr() which is an mlir::StringAttr.
32+
mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy,
33+
std::optional<size_t> size) {
34+
size_t finalSize = size.value_or(str.size());
35+
36+
size_t lastNonZeroPos = str.find_last_not_of('\0');
37+
// If the string is full of null bytes, emit a #cir.zero rather than
38+
// a #cir.const_array.
39+
if (lastNonZeroPos == llvm::StringRef::npos) {
40+
auto arrayTy = cir::ArrayType::get(eltTy, finalSize);
41+
return cir::ZeroAttr::get(arrayTy);
42+
}
43+
// We emit trailing zeros only if there are multiple trailing zeros.
44+
size_t trailingZerosNum = 0;
45+
if (finalSize > lastNonZeroPos + 2)
46+
trailingZerosNum = finalSize - lastNonZeroPos - 1;
47+
auto truncatedArrayTy =
48+
cir::ArrayType::get(eltTy, finalSize - trailingZerosNum);
49+
auto fullArrayTy = cir::ArrayType::get(eltTy, finalSize);
50+
return cir::ConstArrayAttr::get(
51+
fullArrayTy,
52+
mlir::StringAttr::get(str.drop_back(trailingZerosNum),
53+
truncatedArrayTy),
54+
trailingZerosNum);
55+
}
56+
2957
std::string getUniqueAnonRecordName() { return getUniqueRecordName("anon"); }
3058

3159
std::string getUniqueRecordName(const std::string &baseName) {

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,16 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) {
743743
return lv;
744744
}
745745

746+
LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) {
747+
cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e);
748+
assert(!cir::MissingFeatures::opGlobalAlignment());
749+
mlir::Value addr =
750+
builder.createGetGlobal(getLoc(e->getSourceRange()), globalOp);
751+
return makeAddrLValue(
752+
Address(addr, globalOp.getSymType(), CharUnits::fromQuantity(1)),
753+
e->getType(), AlignmentSource::Decl);
754+
}
755+
746756
/// Casts are never lvalues unless that cast is to a reference type. If the cast
747757
/// is to a reference, we can have the usual lvalue result, otherwise if a cast
748758
/// is needed by the code generator in an lvalue context, then it must mean that

clang/lib/CIR/CodeGen/CIRGenFunction.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
531531
return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
532532
case Expr::UnaryOperatorClass:
533533
return emitUnaryOpLValue(cast<UnaryOperator>(e));
534+
case Expr::StringLiteralClass:
535+
return emitStringLiteralLValue(cast<StringLiteral>(e));
534536
case Expr::MemberExprClass:
535537
return emitMemberExpr(cast<MemberExpr>(e));
536538
case Expr::BinaryOperatorClass:

clang/lib/CIR/CodeGen/CIRGenFunction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,8 @@ class CIRGenFunction : public CIRGenTypeCache {
695695

696696
mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);
697697

698+
LValue emitStringLiteralLValue(const StringLiteral *e);
699+
698700
mlir::LogicalResult emitSwitchBody(const clang::Stmt *s);
699701
mlir::LogicalResult emitSwitchCase(const clang::SwitchCase &s,
700702
bool buildingTopLevelCase);

clang/lib/CIR/CodeGen/CIRGenModule.cpp

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,30 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd,
562562
llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition");
563563
}
564564

565+
mlir::Attribute
566+
CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) {
567+
assert(!e->getType()->isPointerType() && "Strings are always arrays");
568+
569+
// Don't emit it as the address of the string, emit the string data itself
570+
// as an inline array.
571+
if (e->getCharByteWidth() == 1) {
572+
SmallString<64> str(e->getString());
573+
574+
// Resize the string to the right size, which is indicated by its type.
575+
const ConstantArrayType *cat =
576+
astContext.getAsConstantArrayType(e->getType());
577+
uint64_t finalSize = cat->getZExtSize();
578+
str.resize(finalSize);
579+
580+
mlir::Type eltTy = convertType(cat->getElementType());
581+
return builder.getString(str, eltTy, finalSize);
582+
}
583+
584+
errorNYI(e->getSourceRange(),
585+
"getConstantArrayFromStringLiteral: wide characters");
586+
return mlir::Attribute();
587+
}
588+
565589
static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) {
566590
assert(!cir::MissingFeatures::supportComdat());
567591

@@ -749,6 +773,84 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) {
749773
return getCIRLinkageForDeclarator(vd, linkage, isConstant);
750774
}
751775

776+
static cir::GlobalOp generateStringLiteral(mlir::Location loc,
777+
mlir::TypedAttr c, CIRGenModule &cgm,
778+
StringRef globalName) {
779+
assert(!cir::MissingFeatures::addressSpace());
780+
781+
// Create a global variable for this string
782+
// FIXME(cir): check for insertion point in module level.
783+
cir::GlobalOp gv =
784+
CIRGenModule::createGlobalOp(cgm, loc, globalName, c.getType());
785+
786+
// Set up extra information and add to the module
787+
assert(!cir::MissingFeatures::opGlobalAlignment());
788+
assert(!cir::MissingFeatures::opGlobalLinkage());
789+
assert(!cir::MissingFeatures::opGlobalThreadLocal());
790+
assert(!cir::MissingFeatures::opGlobalUnnamedAddr());
791+
CIRGenModule::setInitializer(gv, c);
792+
assert(!cir::MissingFeatures::supportComdat());
793+
assert(!cir::MissingFeatures::opGlobalDSOLocal());
794+
return gv;
795+
}
796+
797+
// LLVM IR automatically uniques names when new llvm::GlobalVariables are
798+
// created. This is handy, for example, when creating globals for string
799+
// literals. Since we don't do that when creating cir::GlobalOp's, we need
800+
// a mechanism to generate a unique name in advance.
801+
//
802+
// For now, this mechanism is only used in cases where we know that the
803+
// name is compiler-generated, so we don't use the MLIR symbol table for
804+
// the lookup.
805+
std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) {
806+
// If this is the first time we've generated a name for this basename, use
807+
// it as is and start a counter for this base name.
808+
auto it = cgGlobalNames.find(baseName);
809+
if (it == cgGlobalNames.end()) {
810+
cgGlobalNames[baseName] = 1;
811+
return baseName;
812+
}
813+
814+
std::string result =
815+
baseName + "." + std::to_string(cgGlobalNames[baseName]++);
816+
// There should not be any symbol with this name in the module.
817+
assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result));
818+
return result;
819+
}
820+
821+
/// Return a pointer to a constant array for the given string literal.
822+
cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
823+
StringRef name) {
824+
mlir::Attribute c = getConstantArrayFromStringLiteral(s);
825+
826+
if (getLangOpts().WritableStrings) {
827+
errorNYI(s->getSourceRange(),
828+
"getGlobalForStringLiteral: Writable strings");
829+
}
830+
831+
// Mangle the string literal if that's how the ABI merges duplicate strings.
832+
// Don't do it if they are writable, since we don't want writes in one TU to
833+
// affect strings in another.
834+
if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) &&
835+
!getLangOpts().WritableStrings) {
836+
errorNYI(s->getSourceRange(),
837+
"getGlobalForStringLiteral: mangle string literals");
838+
}
839+
840+
// Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
841+
// we need to do that explicitly.
842+
std::string uniqueName = getUniqueGlobalName(name.str());
843+
mlir::Location loc = getLoc(s->getSourceRange());
844+
auto typedC = llvm::cast<mlir::TypedAttr>(c);
845+
assert(!cir::MissingFeatures::opGlobalAlignment());
846+
cir::GlobalOp gv = generateStringLiteral(loc, typedC, *this, uniqueName);
847+
assert(!cir::MissingFeatures::opGlobalDSOLocal());
848+
849+
assert(!cir::MissingFeatures::sanitizers());
850+
851+
return gv;
852+
}
853+
752854
void CIRGenModule::emitDeclContext(const DeclContext *dc) {
753855
for (Decl *decl : dc->decls()) {
754856
// Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope

clang/lib/CIR/CodeGen/CIRGenModule.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ class CIRGenModule : public CIRGenTypeCache {
126126
llvm::StringRef name, mlir::Type t,
127127
mlir::Operation *insertPoint = nullptr);
128128

129+
llvm::StringMap<unsigned> cgGlobalNames;
130+
std::string getUniqueGlobalName(const std::string &baseName);
131+
129132
/// Return the mlir::Value for the address of the given global variable.
130133
/// If Ty is non-null and if the global doesn't exist, then it will be created
131134
/// with the specified type instead of whatever the normal requested type
@@ -136,6 +139,14 @@ class CIRGenModule : public CIRGenTypeCache {
136139
getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty = {},
137140
ForDefinition_t isForDefinition = NotForDefinition);
138141

142+
/// Return a constant array for the given string.
143+
mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e);
144+
145+
/// Return a global symbol reference to a constant array for the given string
146+
/// literal.
147+
cir::GlobalOp getGlobalForStringLiteral(const StringLiteral *s,
148+
llvm::StringRef name = ".str");
149+
139150
const TargetCIRGenInfo &getTargetCIRGenInfo();
140151

141152
/// Helpers to convert the presumed location of Clang's SourceLocation to an

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,18 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) {
270270
result =
271271
rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
272272
}
273+
} else if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attr.getElts())) {
274+
// TODO(cir): this diverges from traditional lowering. Normally the string
275+
// would be a global constant that is memcopied.
276+
auto arrayTy = mlir::dyn_cast<cir::ArrayType>(strAttr.getType());
277+
assert(arrayTy && "String attribute must have an array type");
278+
mlir::Type eltTy = arrayTy.getElementType();
279+
for (auto [idx, elt] : llvm::enumerate(strAttr)) {
280+
auto init = rewriter.create<mlir::LLVM::ConstantOp>(
281+
loc, converter->convertType(eltTy), elt);
282+
result =
283+
rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
284+
}
273285
} else {
274286
llvm_unreachable("unexpected ConstArrayAttr elements");
275287
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll
4+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
7+
8+
// LLVM: @[[STR1_GLOBAL:.*]] = dso_local global [2 x i8] c"1\00"
9+
// LLVM: @[[STR2_GLOBAL:.*]] = dso_local global [1 x i8] zeroinitializer
10+
// LLVM: @[[STR3_GLOBAL:.*]] = dso_local global [2 x i8] zeroinitializer
11+
12+
// OGCG: @[[STR1_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] c"1\00"
13+
// OGCG: @[[STR2_GLOBAL:.*]] = private unnamed_addr constant [1 x i8] zeroinitializer
14+
// OGCG: @[[STR3_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] zeroinitializer
15+
16+
char *f1() {
17+
return "1";
18+
}
19+
20+
// CIR: cir.global external @[[STR1_GLOBAL:.*]] = #cir.const_array<"1\00" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
21+
// CIR: cir.func @f1()
22+
// CIR: %[[STR:.*]] = cir.get_global @[[STR1_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 2>>
23+
24+
// LLVM: define ptr @f1()
25+
// LLVM: store ptr @[[STR1_GLOBAL]], ptr {{.*}}
26+
27+
// OGCG: define {{.*}}ptr @f1()
28+
// OGCG: ret ptr @[[STR1_GLOBAL]]
29+
30+
char *f2() {
31+
return "";
32+
}
33+
34+
// CIR: cir.global external @[[STR2_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 1>
35+
// CIR: cir.func @f2()
36+
// CIR: %[[STR2:.*]] = cir.get_global @[[STR2_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 1>>
37+
38+
// LLVM: define ptr @f2()
39+
// LLVM: store ptr @[[STR2_GLOBAL]], ptr {{.*}}
40+
41+
// OGCG: define {{.*}}ptr @f2()
42+
// OGCG: ret ptr @[[STR2_GLOBAL]]
43+
44+
char *f3() {
45+
return "\00";
46+
}
47+
48+
// CIR: cir.global external @[[STR3_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 2>
49+
// CIR: cir.func @f3()
50+
// CIR: %[[STR3:.*]] = cir.get_global @[[STR3_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 2>>
51+
52+
// LLVM: define ptr @f3()
53+
// LLVM: store ptr @[[STR3_GLOBAL]], ptr {{.*}}
54+
55+
// OGCG: define {{.*}}ptr @f3()
56+
// OGCG: ret ptr @[[STR3_GLOBAL]]

0 commit comments

Comments
 (0)