Skip to content

Commit 6f86bfa

Browse files
ericastorcopybara-github
authored andcommitted
[XLS] Apply our area & delay models for our LUT conversion pass
By estimating when it's beneficial to merge logic into an existing select, we can apply our LUT identification & conversion logic more often. PiperOrigin-RevId: 691566073
1 parent 0686143 commit 6f86bfa

24 files changed

+741
-226
lines changed

xls/build_rules/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,13 @@ bzl_library(
216216
visibility = ["//visibility:private"],
217217
deps = [
218218
":xls_codegen_rules_bzl",
219+
":xls_common_rules_bzl",
219220
":xls_config_rules_bzl",
221+
":xls_dslx_rules_bzl",
220222
":xls_ir_macros_bzl",
221223
":xls_ir_rules_bzl",
222224
":xls_rules_bzl",
225+
":xls_toolchains_bzl",
223226
":xls_type_check_utils_bzl",
224227
"@bazel_skylib//rules:build_test",
225228
"@bazel_skylib//rules:diff_test",

xls/build_rules/xls_config_rules.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ This module exposes the configuration parameters for the XLS build rules. It
2121
load(
2222
"//xls/build_rules:xls_oss_config_rules.bzl",
2323
_CONFIG = "CONFIG",
24+
_DEFAULT_BENCHMARK_SYNTH_AREA_MODEL = "DEFAULT_BENCHMARK_SYNTH_AREA_MODEL",
2425
_DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL = "DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL",
2526
_delay_model_to_standard_cells = "delay_model_to_standard_cells",
2627
_enable_generated_file_wrapper = "enable_generated_file_wrapper",
2728
)
2829

2930
CONFIG = _CONFIG
3031
DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL = _DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL
32+
DEFAULT_BENCHMARK_SYNTH_AREA_MODEL = _DEFAULT_BENCHMARK_SYNTH_AREA_MODEL
3133
delay_model_to_standard_cells = _delay_model_to_standard_cells
3234
enable_generated_file_wrapper = _enable_generated_file_wrapper

xls/build_rules/xls_ir_rules.bzl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ def _optimize_ir(ctx, src, original_input_files):
225225
"optimize_for_best_case_throughput",
226226
"enable_resource_sharing",
227227
"top",
228+
"delay_model",
229+
"area_model",
228230
)
229231

230232
is_args_valid(opt_ir_args, IR_OPT_FLAGS)
@@ -462,6 +464,8 @@ def get_benchmark_ir_cmd(ctx, src, append_cmd_line_args = True):
462464
"use_context_narrowing_analysis",
463465
"optimize_for_best_case_throughput",
464466
"enable_resource_sharing",
467+
"delay_model",
468+
"area_model",
465469
"run_evaluators",
466470
] + _CODEGEN_FLAGS + _SCHEDULING_FLAGS
467471

xls/build_rules/xls_macros.bzl

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ load(
3333
)
3434
load(
3535
"//xls/build_rules:xls_config_rules.bzl",
36+
"DEFAULT_BENCHMARK_SYNTH_AREA_MODEL",
3637
"DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL",
3738
"delay_model_to_standard_cells",
3839
"enable_generated_file_wrapper",
@@ -557,12 +558,14 @@ Examples:
557558
# Setup shared arguments
558559
SHARED_FLAGS = (
559560
"top",
561+
"delay_model",
560562
)
561563
IR_OPT_FLAGS = (
562564
"ir_dump_path",
563565
"passes",
564566
"skip_passes",
565567
"opt_level",
568+
"area_model",
566569
"convert_array_index_to_select",
567570
"use_context_narrowing_analysis",
568571
"optimize_for_best_case_throughput",
@@ -579,8 +582,14 @@ Examples:
579582
if k not in IR_OPT_FLAGS or k in SHARED_FLAGS
580583
}
581584

582-
# Add default opt args (currently empty)
583-
full_opt_args = dict()
585+
# Add default opt args
586+
full_opt_args = {
587+
"delay_model": DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL,
588+
"area_model": DEFAULT_BENCHMARK_SYNTH_AREA_MODEL,
589+
}
590+
if "delay_model" in opt_ir_args and "area_model" not in opt_ir_args:
591+
# Default to the area model matching the delay model.
592+
opt_ir_args["area_model"] = opt_ir_args["delay_model"]
584593
full_opt_args.update(opt_ir_args)
585594

586595
# Add default codegen args

xls/build_rules/xls_oss_config_rules.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ CONFIG = {
2323
}
2424

2525
DEFAULT_BENCHMARK_SYNTH_DELAY_MODEL = "asap7"
26+
DEFAULT_BENCHMARK_SYNTH_AREA_MODEL = "asap7"
2627

2728
def enable_generated_file_wrapper(**kwargs): # @unused
2829
"""The function is a placeholder for enable_generated_file_wrapper.

xls/dev_tools/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,8 @@ cc_binary(
621621
"//xls/common/status:ret_check",
622622
"//xls/common/status:status_macros",
623623
"//xls/data_structures:binary_decision_diagram",
624+
"//xls/estimators/area_model:area_estimator",
625+
"//xls/estimators/area_model:area_estimators",
624626
"//xls/estimators/delay_model:analyze_critical_path",
625627
"//xls/estimators/delay_model:delay_estimator",
626628
"//xls/estimators/delay_model:delay_estimators",

xls/dev_tools/benchmark_main.cc

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
#include "xls/common/status/status_macros.h"
5151
#include "xls/data_structures/binary_decision_diagram.h"
5252
#include "xls/dev_tools/pipeline_metrics.h"
53+
#include "xls/estimators/area_model/area_estimator.h"
54+
#include "xls/estimators/area_model/area_estimators.h"
5355
#include "xls/estimators/delay_model/analyze_critical_path.h"
5456
#include "xls/estimators/delay_model/delay_estimator.h"
5557
#include "xls/estimators/delay_model/delay_estimators.h"
@@ -129,6 +131,7 @@ ABSL_FLAG(
129131
"but at the cost of constraining the schedule and thus increasing area.");
130132
ABSL_FLAG(bool, enable_resource_sharing, false,
131133
"Enable the resource sharing optimization to save area.");
134+
ABSL_FLAG(std::string, area_model, "", "Area model name to use from registry.");
132135
ABSL_FLAG(bool, run_evaluators, true,
133136
"Whether to run the JIT and interpreter.");
134137
ABSL_FLAG(bool, compare_delay_to_synthesis, false,
@@ -176,7 +179,9 @@ int64_t DurationToMs(absl::Duration duration) {
176179

177180
// Run the standard pipeline on the given package and prints stats about the
178181
// passes and execution time.
179-
absl::Status RunOptimizationAndPrintStats(Package* package) {
182+
absl::Status RunOptimizationAndPrintStats(Package* package,
183+
DelayEstimator* delay_estimator,
184+
AreaEstimator* area_estimator) {
180185
std::unique_ptr<OptimizationCompoundPass> pipeline =
181186
CreateOptimizationPassPipeline();
182187

@@ -197,6 +202,8 @@ absl::Status RunOptimizationAndPrintStats(Package* package) {
197202
: std::make_optional(split_next_value_selects);
198203
pass_options.use_context_narrowing_analysis =
199204
absl::GetFlag(FLAGS_use_context_narrowing_analysis);
205+
pass_options.delay_estimator = delay_estimator;
206+
pass_options.area_estimator = area_estimator;
200207
PassResults pass_results;
201208
OptimizationContext context;
202209
XLS_RETURN_IF_ERROR(
@@ -285,6 +292,18 @@ absl::Status PrintTotalDelay(FunctionBase* f,
285292
return absl::OkStatus();
286293
}
287294

295+
absl::Status PrintTotalArea(FunctionBase* f,
296+
const AreaEstimator& area_estimator) {
297+
int64_t total_area = 0;
298+
for (Node* node : f->nodes()) {
299+
XLS_ASSIGN_OR_RETURN(double op_area,
300+
area_estimator.GetOperationAreaInSquareMicrons(node));
301+
total_area += op_area;
302+
}
303+
std::cout << absl::StrFormat("Total area: %.4f um2\n", total_area);
304+
return absl::OkStatus();
305+
}
306+
288307
// Returns the critical-path delay through each pipeline stage.
289308
absl::StatusOr<std::vector<int64_t>> GetDelayPerStageInPs(
290309
FunctionBase* f, const PipelineSchedule& schedule,
@@ -695,8 +714,9 @@ absl::Status RunInterpreterAndJit(FunctionBase* function_base,
695714

696715
absl::Status AnalyzeAndPrintCriticalPath(
697716
FunctionBase* f, std::optional<int64_t> effective_clock_period_ps,
698-
const DelayEstimator& delay_estimator, const QueryEngine& query_engine,
699-
PipelineScheduleOrGroup* schedules, synthesis::Synthesizer* synthesizer) {
717+
const DelayEstimator& delay_estimator, const AreaEstimator* area_estimator,
718+
const QueryEngine& query_engine, PipelineScheduleOrGroup* schedules,
719+
synthesis::Synthesizer* synthesizer) {
700720
XLS_ASSIGN_OR_RETURN(
701721
std::vector<CriticalPathEntry> critical_path,
702722
AnalyzeCriticalPath(f, effective_clock_period_ps, delay_estimator));
@@ -718,6 +738,9 @@ absl::Status AnalyzeAndPrintCriticalPath(
718738
}
719739
XLS_RETURN_IF_ERROR(PrintCriticalPath(f, query_engine, delay_diff));
720740
XLS_RETURN_IF_ERROR(PrintTotalDelay(f, delay_estimator));
741+
if (area_estimator != nullptr) {
742+
XLS_RETURN_IF_ERROR(PrintTotalArea(f, *area_estimator));
743+
}
721744
return absl::OkStatus();
722745
}
723746

@@ -747,7 +770,19 @@ absl::Status RealMain(std::string_view path) {
747770
XLS_RETURN_IF_ERROR(
748771
RunInterpreterAndJit(package->GetTop().value(), "unoptimized"));
749772
}
750-
XLS_RETURN_IF_ERROR(RunOptimizationAndPrintStats(package.get()));
773+
DelayEstimator* delay_estimator = nullptr;
774+
if (delay_model_flag_passed) {
775+
XLS_ASSIGN_OR_RETURN(
776+
delay_estimator,
777+
GetDelayEstimator(scheduling_options_flags_proto.delay_model()));
778+
}
779+
AreaEstimator* area_estimator = nullptr;
780+
if (std::string area_model = absl::GetFlag(FLAGS_area_model);
781+
!area_model.empty()) {
782+
XLS_ASSIGN_OR_RETURN(area_estimator, GetAreaEstimator(area_model));
783+
}
784+
XLS_RETURN_IF_ERROR(RunOptimizationAndPrintStats(
785+
package.get(), delay_estimator, area_estimator));
751786

752787
FunctionBase* f = package->GetTop().value();
753788
BddQueryEngine query_engine(BddQueryEngine::kDefaultPathLimit);
@@ -768,15 +803,8 @@ absl::Status RealMain(std::string_view path) {
768803
100;
769804
}
770805
}
771-
const DelayEstimator* pdelay_estimator;
772-
if (!delay_model_flag_passed) {
773-
pdelay_estimator = &GetStandardDelayEstimator();
774-
} else {
775-
XLS_ASSIGN_OR_RETURN(
776-
pdelay_estimator,
777-
GetDelayEstimator(scheduling_options_flags_proto.delay_model()));
778-
}
779-
const auto& delay_estimator = *pdelay_estimator;
806+
const DelayEstimator& defaulted_delay_estimator =
807+
delay_estimator ? *delay_estimator : GetStandardDelayEstimator();
780808
std::unique_ptr<synthesis::Synthesizer> synthesizer;
781809
if (absl::GetFlag(FLAGS_compare_delay_to_synthesis)) {
782810
synthesis::GrpcSynthesizerParameters parameters(
@@ -794,7 +822,8 @@ absl::Status RealMain(std::string_view path) {
794822
scheduling_options_flags_proto.pipeline_stages() > 0;
795823
if (!f->IsProc() && !benchmark_codegen) {
796824
XLS_RETURN_IF_ERROR(AnalyzeAndPrintCriticalPath(
797-
f, effective_clock_period_ps, delay_estimator, query_engine,
825+
f, effective_clock_period_ps, defaulted_delay_estimator, area_estimator,
826+
query_engine,
798827
/*schedules=*/nullptr, synthesizer.get()));
799828
} else if (benchmark_codegen) {
800829
PipelineScheduleOrGroup schedules = PackagePipelineSchedules();
@@ -803,17 +832,17 @@ absl::Status RealMain(std::string_view path) {
803832
SetUpSchedulingOptions(
804833
scheduling_options_flags_proto, package.get()));
805834
absl::Duration scheduling_time;
806-
XLS_ASSIGN_OR_RETURN(schedules,
807-
Schedule(package.get(), scheduling_options,
808-
&delay_estimator, &scheduling_time));
835+
XLS_ASSIGN_OR_RETURN(
836+
schedules, Schedule(package.get(), scheduling_options,
837+
&defaulted_delay_estimator, &scheduling_time));
809838
std::cout << absl::StreamFormat("Scheduling time: %dms\n",
810839
scheduling_time / absl::Milliseconds(1));
811840
XLS_RETURN_IF_ERROR(AnalyzeAndPrintCriticalPath(
812-
f, effective_clock_period_ps, delay_estimator, query_engine,
813-
&schedules, synthesizer.get()));
841+
f, effective_clock_period_ps, defaulted_delay_estimator,
842+
area_estimator, query_engine, &schedules, synthesizer.get()));
814843

815844
XLS_RETURN_IF_ERROR(PrintScheduleInfo(
816-
f, schedules, query_engine, delay_estimator,
845+
f, schedules, query_engine, defaulted_delay_estimator,
817846
scheduling_options_flags_proto.has_clock_period_ps()
818847
? std::make_optional(
819848
scheduling_options_flags_proto.clock_period_ps())

xls/ir/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,7 @@ cc_library(
10081008
":verifier",
10091009
"//xls/common/status:ret_check",
10101010
"//xls/common/status:status_macros",
1011+
"//xls/estimators/area_model:area_estimator",
10111012
"//xls/estimators/delay_model:delay_estimator",
10121013
"@com_google_absl//absl/container:flat_hash_map",
10131014
"@com_google_absl//absl/log",

xls/ir/function_base.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,8 @@ absl::StatusOr<Node*> FunctionBase::GetNode(
135135
}
136136

137137
absl::Status FunctionBase::RemoveNode(Node* node) {
138-
XLS_RET_CHECK(node->users().empty()) << node->GetName();
138+
XLS_RET_CHECK(node->users().empty())
139+
<< node->GetName() << ", users " << node->GetUsersString();
139140
XLS_RET_CHECK(!HasImplicitUse(node)) << node->GetName();
140141
VLOG(4) << absl::StrFormat("Removing node from FunctionBase %s: %s", name(),
141142
node->ToString());

xls/ir/ir_test_base.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "absl/status/statusor.h"
2828
#include "absl/strings/str_format.h"
2929
#include "absl/strings/str_replace.h"
30+
#include "xls/estimators/area_model/area_estimator.h"
3031
#include "xls/estimators/delay_model/delay_estimator.h"
3132
#include "xls/ir/function.h"
3233
#include "xls/ir/function_base.h"
@@ -148,6 +149,41 @@ class TestDelayEstimator : public DelayEstimator {
148149
int64_t base_delay_;
149150
};
150151

152+
class TestAreaEstimator : public AreaEstimator {
153+
public:
154+
explicit TestAreaEstimator(double base_area = 1)
155+
: AreaEstimator("test"), base_area_(base_area) {}
156+
157+
absl::StatusOr<double> GetOperationAreaInSquareMicrons(
158+
Node* node) const override {
159+
switch (node->op()) {
160+
case Op::kAfterAll:
161+
case Op::kMinDelay:
162+
case Op::kBitSlice:
163+
case Op::kConcat:
164+
case Op::kLiteral:
165+
case Op::kParam:
166+
case Op::kNext:
167+
case Op::kReceive:
168+
case Op::kSend:
169+
case Op::kTupleIndex:
170+
return 0.0;
171+
case Op::kUDiv:
172+
case Op::kSDiv:
173+
return 2 * base_area_;
174+
default:
175+
return base_area_;
176+
}
177+
}
178+
179+
private:
180+
absl::StatusOr<double> GetOneBitRegisterAreaInSquareMicrons() const override {
181+
return base_area_;
182+
}
183+
184+
double base_area_;
185+
};
186+
151187
// Helper to record IR before and after some test event which changes it.
152188
struct ScopedRecordIr {
153189
public:

0 commit comments

Comments
 (0)