Open
Description
When sliding a producer backwards, values beyond the end of the currently computed block are used. They are valid because they were computed on a previous loop iteration of the consumer.
However, if you split the producer with a PredicateLoad tail strategy, these needed beyond-the-end values can be clobbered with garbage. Here's a failure case:
#include "Halide.h"
using namespace Halide;
int main(int argc, char **argv) {
Func input, f, g;
Var x;
input(x) = x;
f(x) = input(x);
const int w = 1024;
g(x) = f(w - x - 1) + f(w - x) + f(w - x + 1);
input.compute_root();
f.store_root().compute_at(g, x);
f.vectorize(x, 8, TailStrategy::PredicateLoads);
Buffer<int> buf = g.realize({1024});
for (int x = 0; x < 1024; x++) {
int correct = (1024 - x) * 3;
if (buf(x) != correct) {
printf("buf(%d) = %d instead of %d\n", x, buf(x), correct);
return 1;
}
}
return 0;
}