Skip to content

Commit 4b89d19

Browse files
committed
Add scope filtering for symbol extraction
Added specific filters for Avro, Protobuf and Wire libraries. Generated classes for those are not useful to have symbols for.
1 parent bed9326 commit 4b89d19

File tree

12 files changed

+323
-11
lines changed

12 files changed

+323
-11
lines changed

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/agent/DebuggerAgent.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,12 @@
1111
import com.datadog.debugger.sink.ProbeStatusSink;
1212
import com.datadog.debugger.sink.SnapshotSink;
1313
import com.datadog.debugger.sink.SymbolSink;
14+
import com.datadog.debugger.symbol.AvroFilter;
15+
import com.datadog.debugger.symbol.ProtoFilter;
16+
import com.datadog.debugger.symbol.ScopeFilter;
1417
import com.datadog.debugger.symbol.SymDBEnablement;
1518
import com.datadog.debugger.symbol.SymbolAggregator;
19+
import com.datadog.debugger.symbol.WireFilter;
1620
import com.datadog.debugger.uploader.BatchUploader;
1721
import com.datadog.debugger.util.ClassNameFiltering;
1822
import com.datadog.debugger.util.DebuggerMetrics;
@@ -41,7 +45,9 @@
4145
import java.nio.file.Path;
4246
import java.nio.file.Paths;
4347
import java.time.Duration;
48+
import java.util.Arrays;
4449
import java.util.Collections;
50+
import java.util.List;
4551
import java.util.concurrent.atomic.AtomicBoolean;
4652
import java.util.stream.Collectors;
4753
import java.util.zip.ZipOutputStream;
@@ -155,9 +161,14 @@ public static void startDynamicInstrumentation() {
155161
if (configurationPoller != null) {
156162
if (config.isSymbolDatabaseEnabled()) {
157163
initClassNameFilter();
164+
List<ScopeFilter> scopeFilters =
165+
Arrays.asList(new AvroFilter(), new ProtoFilter(), new WireFilter());
158166
SymbolAggregator symbolAggregator =
159167
new SymbolAggregator(
160-
classNameFilter, sink.getSymbolSink(), config.getSymbolDatabaseFlushThreshold());
168+
classNameFilter,
169+
scopeFilters,
170+
sink.getSymbolSink(),
171+
config.getSymbolDatabaseFlushThreshold());
161172
symbolAggregator.start();
162173
symDBEnablement =
163174
new SymDBEnablement(instrumentation, config, symbolAggregator, classNameFilter);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.datadog.debugger.symbol;
2+
3+
public class AvroFilter implements ScopeFilter {
4+
@Override
5+
public boolean filterOut(Scope scope) {
6+
if (scope == null) {
7+
return false;
8+
}
9+
LanguageSpecifics languageSpecifics = scope.getLanguageSpecifics();
10+
if (languageSpecifics != null) {
11+
String superClass = languageSpecifics.getSuperClass();
12+
// Allow Avro data classes that extend SpecificRecordBase.
13+
if ("org.apache.avro.specific.SpecificRecordBase".equals(superClass)) {
14+
return false;
15+
}
16+
}
17+
// Filter out classes that appear to be just schema wrappers.
18+
if (scope.getScopeType() == ScopeType.CLASS
19+
&& scope.getSymbols() != null
20+
&& scope.getSymbols().stream()
21+
.anyMatch(
22+
it ->
23+
it.getSymbolType() == SymbolType.STATIC_FIELD
24+
&& "SCHEMA$".equals(it.getName())
25+
&& it.getType() != null
26+
&& it.getType().contains("org.apache.avro.Schema"))) {
27+
return true;
28+
}
29+
// Otherwise, do not filter.
30+
return false;
31+
}
32+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import java.util.List;
4+
5+
public class ProtoFilter implements ScopeFilter {
6+
@Override
7+
public boolean filterOut(Scope scope) {
8+
if (scope == null) {
9+
return false;
10+
}
11+
LanguageSpecifics languageSpecifics = scope.getLanguageSpecifics();
12+
if (languageSpecifics != null) {
13+
List<String> interfaces = languageSpecifics.getInterfaces();
14+
if (interfaces != null) {
15+
if (interfaces.contains("com.google.protobuf.MessageOrBuilder")) {
16+
// MessageOrBuilder is an interface implemented by both message classes and their
17+
// builders.
18+
// Scopes implementing this interface are filtered out because they do not represent
19+
// concrete data structures but rather interfaces for accessing or building messages.
20+
return true;
21+
}
22+
}
23+
String superClass = languageSpecifics.getSuperClass();
24+
if ("com.google.protobuf.AbstractParser".equals(superClass)) {
25+
// AbstractParser is a base class for parsing protobuf messages. Scopes with this super
26+
// class are filtered out because they are utility classes for parsing and do not contain
27+
// actual data fields.
28+
return true;
29+
}
30+
if ("com.google.protobuf.GeneratedMessageV3$Builder".equals(superClass)) {
31+
// GeneratedMessageV3$Builder is a builder class for constructing GeneratedMessageV3
32+
// instances. These scopes are filtered out because they are used for building messages and
33+
// do not represent the final data structure.
34+
return true;
35+
}
36+
}
37+
// If none of the above matched, see if the class has a proto descriptor field. This is the case
38+
// for wrapper
39+
// classes (`OuterClass`) and `Enum` classes. They contain metadata, not data.
40+
if (hasProtoDescriptorField(scope)) {
41+
return true;
42+
}
43+
// Probably no protobuf, pass
44+
return false;
45+
}
46+
47+
private boolean hasProtoDescriptorField(Scope scope) {
48+
return scope.getScopeType() == ScopeType.CLASS
49+
&& scope.getSymbols() != null
50+
&& scope.getSymbols().stream()
51+
.anyMatch(
52+
it ->
53+
it.getSymbolType() == SymbolType.STATIC_FIELD
54+
&& it.getType() != null
55+
&& it.getType().contains("com.google.protobuf.Descriptors"));
56+
}
57+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
package com.datadog.debugger.symbol;
2+
3+
public interface ScopeFilter {
4+
/** returns true if the scope should be excluded */
5+
boolean filterOut(Scope scope);
6+
}

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/symbol/SymbolAggregator.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public class SymbolAggregator {
3838
private static final int CLASSFILE_BUFFER_SIZE = 8192;
3939

4040
private final DebuggerContext.ClassNameFilter classNameFilter;
41+
private final List<ScopeFilter> scopeFilters;
4142
private final SymbolSink sink;
4243
private final int symbolFlushThreshold;
4344
private final Map<String, Scope> jarScopesByName = new HashMap<>();
@@ -51,8 +52,12 @@ public class SymbolAggregator {
5152
private final Set<String> alreadyScannedJars = ConcurrentHashMap.newKeySet();
5253

5354
public SymbolAggregator(
54-
DebuggerContext.ClassNameFilter classNameFilter, SymbolSink sink, int symbolFlushThreshold) {
55+
DebuggerContext.ClassNameFilter classNameFilter,
56+
List<ScopeFilter> scopeFilters,
57+
SymbolSink sink,
58+
int symbolFlushThreshold) {
5559
this.classNameFilter = classNameFilter;
60+
this.scopeFilters = scopeFilters;
5661
this.sink = sink;
5762
this.symbolFlushThreshold = symbolFlushThreshold;
5863
}
@@ -119,10 +124,18 @@ public void parseClass(
119124
}
120125
LOGGER.debug("Extracting Symbols from: {}, located in: {}", className, jarName);
121126
Scope jarScope = SymbolExtractor.extract(classfileBuffer, jarName);
127+
jarScope = applyFilters(jarScope);
122128
addJarScope(jarScope, false);
123129
symDBReport.incClassCount(jarName);
124130
}
125131

132+
private Scope applyFilters(Scope jarScope) {
133+
for (ScopeFilter filter : scopeFilters) {
134+
jarScope.getScopes().removeIf(filter::filterOut);
135+
}
136+
return jarScope;
137+
}
138+
126139
private void flushRemainingScopes(SymbolAggregator symbolAggregator) {
127140
synchronized (jarScopeLock) {
128141
if (jarScopesByName.isEmpty()) {
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import java.util.List;
4+
5+
public class WireFilter implements ScopeFilter {
6+
@Override
7+
public boolean filterOut(Scope scope) {
8+
// Filter out classes generated by Square Wire: https://square.github.io/wire/
9+
if (scope == null) {
10+
return false;
11+
}
12+
LanguageSpecifics languageSpecifics = scope.getLanguageSpecifics();
13+
if (languageSpecifics == null) {
14+
return false;
15+
}
16+
List<String> interfaces = languageSpecifics.getInterfaces();
17+
if (interfaces != null) {
18+
if (interfaces.contains("com.squareup.wire.Message")) {
19+
// Pass-through for Message since it contains data
20+
return false;
21+
}
22+
if (interfaces.stream().anyMatch(it -> it.startsWith("com.squareup.wire"))) {
23+
return true;
24+
}
25+
}
26+
String superClass = languageSpecifics.getSuperClass();
27+
if (superClass != null) {
28+
if (superClass.startsWith("com.squareup.wire")) {
29+
return true;
30+
}
31+
}
32+
// Probably no protobuf, pass
33+
return false;
34+
}
35+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import static java.util.Arrays.asList;
4+
import static org.junit.jupiter.api.Assertions.*;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
class AvroFilterTest {
9+
@Test
10+
void filterOut() {
11+
AvroFilter avroFilter = new AvroFilter();
12+
assertFalse(avroFilter.filterOut(null));
13+
Scope scope = Scope.builder(ScopeType.CLASS, "", 0, 0).build();
14+
assertFalse(avroFilter.filterOut(scope));
15+
scope =
16+
Scope.builder(ScopeType.CLASS, "", 0, 0)
17+
.languageSpecifics(
18+
new LanguageSpecifics.Builder()
19+
.superClass("org.apache.avro.specific.SpecificRecordBase")
20+
.build())
21+
.build();
22+
assertFalse(avroFilter.filterOut(scope));
23+
scope =
24+
Scope.builder(ScopeType.CLASS, "", 0, 0)
25+
.symbols(
26+
asList(
27+
new Symbol(
28+
SymbolType.STATIC_FIELD, "SCHEMA$", 0, "org.apache.avro.Schema", null)))
29+
.build();
30+
assertTrue(avroFilter.filterOut(scope));
31+
}
32+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package com.datadog.debugger.symbol;
2+
3+
import static java.util.Arrays.asList;
4+
import static org.junit.jupiter.api.Assertions.*;
5+
6+
import org.junit.jupiter.api.Test;
7+
8+
class ProtoFilterTest {
9+
@Test
10+
void filterOut() {
11+
ProtoFilter protoFilter = new ProtoFilter();
12+
assertFalse(protoFilter.filterOut(null));
13+
Scope scope = Scope.builder(ScopeType.CLASS, "", 0, 0).build();
14+
assertFalse(protoFilter.filterOut(scope));
15+
scope =
16+
Scope.builder(ScopeType.CLASS, "", 0, 0)
17+
.languageSpecifics(
18+
new LanguageSpecifics.Builder()
19+
.addInterfaces(asList("com.google.protobuf.MessageOrBuilder"))
20+
.build())
21+
.build();
22+
assertTrue(protoFilter.filterOut(scope));
23+
scope =
24+
Scope.builder(ScopeType.CLASS, "", 0, 0)
25+
.languageSpecifics(
26+
new LanguageSpecifics.Builder()
27+
.superClass("com.google.protobuf.AbstractParser")
28+
.build())
29+
.build();
30+
assertTrue(protoFilter.filterOut(scope));
31+
scope =
32+
Scope.builder(ScopeType.CLASS, "", 0, 0)
33+
.languageSpecifics(
34+
new LanguageSpecifics.Builder()
35+
.superClass("com.google.protobuf.GeneratedMessageV3$Builder")
36+
.build())
37+
.build();
38+
assertTrue(protoFilter.filterOut(scope));
39+
scope =
40+
Scope.builder(ScopeType.CLASS, "", 0, 0)
41+
.symbols(
42+
asList(
43+
new Symbol(
44+
SymbolType.STATIC_FIELD,
45+
"SCHEMA$",
46+
0,
47+
"com.google.protobuf.Descriptors",
48+
null)))
49+
.build();
50+
assertTrue(protoFilter.filterOut(scope));
51+
}
52+
}

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/symbol/SymDBEnablementTest.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.datadog.debugger.symbol;
22

3+
import static java.util.Collections.emptyList;
34
import static org.junit.jupiter.api.Assertions.assertEquals;
45
import static org.junit.jupiter.api.Assertions.assertFalse;
56
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -63,7 +64,7 @@ public void enableDisableSymDBThroughRC() throws Exception {
6364
new SymDBEnablement(
6465
instr,
6566
config,
66-
new SymbolAggregator(classNameFiltering, symbolSink, 1),
67+
new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1),
6768
classNameFiltering);
6869
symDBEnablement.accept(ParsedConfigKey.parse(CONFIG_KEY), UPlOAD_SYMBOL_TRUE, null);
6970
waitForUpload(symDBEnablement);
@@ -79,7 +80,7 @@ public void removeSymDBConfig() throws Exception {
7980
new SymDBEnablement(
8081
instr,
8182
config,
82-
new SymbolAggregator(classNameFiltering, symbolSink, 1),
83+
new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1),
8384
classNameFiltering);
8485
symDBEnablement.accept(ParsedConfigKey.parse(CONFIG_KEY), UPlOAD_SYMBOL_TRUE, null);
8586
waitForUpload(symDBEnablement);
@@ -96,7 +97,7 @@ public void noIncludesFilterOutDatadogClass() {
9697
new SymDBEnablement(
9798
instr,
9899
config,
99-
new SymbolAggregator(classNameFiltering, symbolSink, 1),
100+
new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1),
100101
classNameFiltering);
101102
symDBEnablement.startSymbolExtraction();
102103
ArgumentCaptor<SymbolExtractionTransformer> captor =
@@ -122,7 +123,7 @@ public void parseLoadedClass() throws ClassNotFoundException, IOException {
122123
.collect(Collectors.toSet()));
123124
ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll();
124125
SymbolAggregator symbolAggregator =
125-
spy(new SymbolAggregator(classNameFiltering, symbolSink, 1));
126+
spy(new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1));
126127
SymDBEnablement symDBEnablement =
127128
new SymDBEnablement(instr, config, symbolAggregator, classNameFiltering);
128129
symDBEnablement.startSymbolExtraction();
@@ -150,7 +151,7 @@ public void parseLoadedClassFromDirectory()
150151
.collect(Collectors.toSet()));
151152
ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll();
152153
SymbolAggregator symbolAggregator =
153-
spy(new SymbolAggregator(classNameFiltering, symbolSink, 1));
154+
spy(new SymbolAggregator(classNameFiltering, emptyList(), symbolSink, 1));
154155
SymDBEnablement symDBEnablement =
155156
new SymDBEnablement(instr, config, symbolAggregator, classNameFiltering);
156157
symDBEnablement.startSymbolExtraction();
@@ -171,7 +172,8 @@ public void noDuplicateSymbolExtraction() {
171172
Collections.singleton("org.springframework."),
172173
Collections.singleton("com.datadog.debugger."),
173174
Collections.emptySet());
174-
SymbolAggregator symbolAggregator = new SymbolAggregator(classNameFiltering, mockSymbolSink, 1);
175+
SymbolAggregator symbolAggregator =
176+
new SymbolAggregator(classNameFiltering, emptyList(), mockSymbolSink, 1);
175177
SymDBEnablement symDBEnablement =
176178
new SymDBEnablement(instr, config, symbolAggregator, classNameFiltering);
177179
doAnswer(

0 commit comments

Comments
 (0)