Skip to content

Enable more Python types to be supported by the DALI python function #5598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dali/benchmark/dali_bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class DALIBenchmark : public benchmark::Fixture {
tl->Resize(shape, DALI_UINT8);

for (int i = 0; i < n; ++i) {
std::memcpy(tl->template mutable_tensor<uint8>(i),
std::memcpy(tl->template mutable_tensor<uint8_t>(i),
jpegs_.data_[i % nImgs], jpegs_.sizes_[i % nImgs]);
tl->SetSourceInfo(i, jpeg_names_[i % nImgs] + "_" + std::to_string(i));
}
Expand Down
2 changes: 1 addition & 1 deletion dali/fuzzing/dali_harness.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class FileListHarness {

for (int i = 0; i < batch_size_; ++i) {
std::memcpy(
input_data_.template mutable_tensor<uint8>(i),
input_data_.template mutable_tensor<uint8_t>(i),
images_.data_[i],
images_.sizes_[i]);
input_data_.SetSourceInfo(i, image_names_[i] + "_" + std::to_string(i));
Expand Down
16 changes: 8 additions & 8 deletions dali/kernels/transpose/transpose_gpu_impl_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,10 @@ TEST(TransposeTiled, BuildDescVectorized) {
TEST(TransposeTiled, BuildDescAndForceMisalignment) {
TensorShape<> shape = { 57, 37, 52, 4 }; // a bunch of primes, just to make it harder
int size = volume(shape);
vector<uint8> in_cpu(size + 4), out_cpu(size + 4);
vector<uint8> ref(size + 4);
vector<uint8_t> in_cpu(size + 4), out_cpu(size + 4);
vector<uint8_t> ref(size + 4);

DeviceBuffer<uint8> in_gpu, out_gpu;
DeviceBuffer<uint8_t> in_gpu, out_gpu;
in_gpu.resize(size + 4);
out_gpu.resize(size + 4);

Expand All @@ -208,7 +208,7 @@ TEST(TransposeTiled, BuildDescAndForceMisalignment) {
SmallVector<int, 6> perm = { 1, 2, 0, 3 };

int grid_size = 1024;
TiledTransposeDesc<uint8> desc;
TiledTransposeDesc<uint8_t> desc;
memset(&desc, 0xCC, sizeof(desc));
InitTiledTranspose(desc, shape, make_span(perm), out_gpu.data() + offset,
in_gpu.data() + offset, grid_size);
Expand Down Expand Up @@ -258,10 +258,10 @@ TEST(TransposeTiled, BuildDescVectorized16BitOpt) {
TEST(TransposeTiled, HighDimensionTest) {
TensorShape<> shape = {3, 3, 5, 7, 23, 3, 37, 4 }; // a bunch of primes, just to make it harder
int size = volume(shape);
vector<uint8> in_cpu(size), out_cpu(size);
vector<uint8> ref(size);
vector<uint8_t> in_cpu(size), out_cpu(size);
vector<uint8_t> ref(size);

DeviceBuffer<uint8> in_gpu, out_gpu;
DeviceBuffer<uint8_t> in_gpu, out_gpu;
in_gpu.resize(size);
out_gpu.resize(size);

Expand All @@ -276,7 +276,7 @@ TEST(TransposeTiled, HighDimensionTest) {
SmallVector<int, 8> perm = { 1, 0, 4, 2, 6, 3, 5, 7 };

int grid_size = 1024;
TiledTransposeDesc<uint8> desc;
TiledTransposeDesc<uint8_t> desc;
memset(&desc, 0xCC, sizeof(desc));
InitTiledTranspose(desc, shape, make_span(perm), out_gpu.data(), in_gpu.data(), grid_size);

Expand Down
4 changes: 2 additions & 2 deletions dali/kernels/transpose/transpose_gpu_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ TEST(TransposeGPU, PerfDeinterleave) {

std::cerr << "Permuting 1-byte data; permutation 2 0 1\ninput shape = \n" << shape << "\n";

RunPerfTest<uint8>(rng, shape, make_span(perm));
RunPerfTest<uint8_t>(rng, shape, make_span(perm));
}


Expand All @@ -226,7 +226,7 @@ TEST(TransposeGPU, PerfInterleave) {

std::cerr << "Permuting 1-byte data; permutation 1 2 0\ninput shape = \n" << shape << "\n";

RunPerfTest<uint8>(rng, shape, make_span(perm));
RunPerfTest<uint8_t>(rng, shape, make_span(perm));
}


Expand Down
2 changes: 1 addition & 1 deletion dali/operators/debug/dump_image.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void DumpImage<CPUBackend>::RunImpl(SampleWorkspace &ws) {
make_string("Only 3-channel and gray images are supported, got input with `", c,
"` channels."));

WriteHWCImage(input.template data<uint8>(),
WriteHWCImage(input.template data<uint8_t>(),
h, w, c, std::to_string(ws.data_idx()) + "-" + suffix_ + "-" + std::to_string(0));

// Forward the input
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/decoder/host/host_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ void HostDecoder::RunImpl(SampleWorkspace &ws) {

// Verify input
DALI_ENFORCE(input.ndim() == 1, "Input must be 1D encoded jpeg string.");
DALI_ENFORCE(IsType<uint8>(input.type()), "Input must be stored as uint8 data.");
DALI_ENFORCE(IsType<uint8_t>(input.type()), "Input must be stored as uint8 data.");

std::unique_ptr<Image> img;
try {
DomainTimeRange tr(make_string("Decode #", ws.data_idx(), " fast_idct=", use_fast_idct_),
DomainTimeRange::kBlue1);
img = ImageFactory::CreateImage(input.data<uint8>(), input.size(), output_type_);
img = ImageFactory::CreateImage(input.data<uint8_t>(), input.size(), output_type_);
img->SetCropWindowGenerator(GetCropWindowGenerator(ws.data_idx()));
img->SetUseFastIdct(use_fast_idct_);
img->Decode();
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/decoder/image/image_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace dali {

namespace {

bool CheckIsJPEG(const uint8 *jpeg, int) {
bool CheckIsJPEG(const uint8_t *jpeg, int) {
DALI_ENFORCE(jpeg);
return (jpeg[0] == 255) && (jpeg[1] == 216);
}
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/decoder/image/jpeg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ JpegImage::JpegImage(const uint8_t *encoded_buffer,
}

#ifndef DALI_USE_JPEG_TURBO
bool get_jpeg_size(const uint8 *data, size_t data_size, int *height, int *width, int *nchannels) {
bool get_jpeg_size(const uint8_t *data, size_t data_size, int *height, int *width, int *nchannels) {
unsigned int i = 0;
if (!(data[i] == 0xFF && data[i + 1] == 0xD8))
return false; // Not a valid SOI header
Expand Down Expand Up @@ -59,7 +59,7 @@ bool get_jpeg_size(const uint8 *data, size_t data_size, int *height, int *width,
#endif

std::pair<std::shared_ptr<uint8_t>, Image::Shape>
JpegImage::DecodeImpl(DALIImageType type, const uint8 *jpeg, size_t length) const {
JpegImage::DecodeImpl(DALIImageType type, const uint8_t *jpeg, size_t length) const {
const auto shape = PeekShapeImpl(jpeg, length);
const auto h = shape[0];
const auto w = shape[1];
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/decoder/image/tiff_libtiff.cc
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ Image::Shape TiffImage_Libtiff::PeekShapeImpl(const uint8_t *encoded_buffer,

std::pair<std::shared_ptr<uint8_t>, Image::Shape>
TiffImage_Libtiff::DecodeImpl(DALIImageType image_type,
const uint8 *encoded_buffer,
const uint8_t *encoded_buffer,
size_t length) const {
// This decoder only handles bitdepth=8, non-tiled and top-left orientation
// Other cases go to OpenCV's based decoder
Expand Down
6 changes: 3 additions & 3 deletions dali/operators/decoder/jpeg/jpeg_handle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,12 @@ boolean MemFillInputBuffer(j_decompress_ptr cinfo) {
void MemTermSource(j_decompress_ptr cinfo) {}

// -----------------------------------------------------------------------------
void MemSkipInputData(j_decompress_ptr cinfo, int64 jump) {
void MemSkipInputData(j_decompress_ptr cinfo, int64_t jump) {
MemSourceMgr *src = reinterpret_cast<MemSourceMgr *>(cinfo->src);
if (jump < 0) {
return;
}
if (jump > static_cast<int64>(src->pub.bytes_in_buffer)) {
if (jump > static_cast<int64_t>(src->pub.bytes_in_buffer)) {
src->pub.bytes_in_buffer = 0;
(void)MemFillInputBuffer(cinfo); // warn with a fake EOI or error
} else {
Expand All @@ -202,7 +202,7 @@ void MemSkipInputData(j_decompress_ptr cinfo, int64 jump) {

// -----------------------------------------------------------------------------
void SetSrc(j_decompress_ptr cinfo, const void *data,
uint64 datasize, bool try_recover_truncated_jpeg) {
uint64_t datasize, bool try_recover_truncated_jpeg) {
MemSourceMgr *src;

cinfo->src = reinterpret_cast<struct jpeg_source_mgr *>(
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/decoder/jpeg/jpeg_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ typedef struct {
typedef struct {
struct jpeg_source_mgr pub;
const unsigned char *data;
uint64 datasize;
uint64_t datasize;
bool try_recover_truncated_jpeg;
} MemSourceMgr;

void SetSrc(j_decompress_ptr cinfo, const void *data,
uint64 datasize, bool try_recover_truncated_jpeg);
uint64_t datasize, bool try_recover_truncated_jpeg);

// JPEG destination: we will store all the data in a buffer "buffer" of total
// size "bufsize", if the buffer overflows, we will be in trouble.
Expand Down
22 changes: 11 additions & 11 deletions dali/operators/decoder/jpeg/jpeg_mem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ bool IsCropWindowValid(const UncompressFlags& flags, int input_image_width,
flags.crop_x + flags.crop_width <= input_image_width;
}

std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) {
std::unique_ptr<uint8_t[]> UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) {
// unpack the argball
const int datasize = argball->datasize_;
const auto& flags = argball->flags_;
Expand All @@ -97,7 +97,7 @@ std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler

// Declare buffers here so that we can free on error paths
std::unique_ptr<JSAMPLE[]> temp;
std::unique_ptr<uint8[]> dstdata;
std::unique_ptr<uint8_t[]> dstdata;
JSAMPLE *tempdata = nullptr;

// Initialize libjpeg structures to have a memory source
Expand Down Expand Up @@ -170,9 +170,9 @@ std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler
// OOM'ing doing the decompress
jpeg_calc_output_dimensions(&cinfo);

int64 total_size = static_cast<int64>(cinfo.output_height) *
static_cast<int64>(cinfo.output_width) *
static_cast<int64>(cinfo.num_components);
int64_t total_size = static_cast<int64_t>(cinfo.output_height) *
static_cast<int64_t>(cinfo.output_width) *
static_cast<int64_t>(cinfo.num_components);
// Some of the internal routines do not gracefully handle ridiculously
// large images, so fail fast.
if (cinfo.output_width <= 0 || cinfo.output_height <= 0) {
Expand Down Expand Up @@ -391,7 +391,7 @@ std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler
if (components == 4) {
// Start on the last line.
JSAMPLE* scanlineptr = static_cast<JSAMPLE*>(
dstdata.get() + static_cast<int64>(target_output_height - 1) * stride);
dstdata.get() + static_cast<int64_t>(target_output_height - 1) * stride);
const JSAMPLE kOpaque = -1; // All ones appropriate for JSAMPLE.
const int right_rgb = (target_output_width - 1) * 3;
const int right_rgba = (target_output_width - 1) * 4;
Expand Down Expand Up @@ -471,7 +471,7 @@ std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler
}

const auto full_image = std::move(dstdata);
dstdata = std::unique_ptr<uint8[]>(
dstdata = std::unique_ptr<uint8_t[]>(
new JSAMPLE[target_output_width, target_output_height, components]);
if (dstdata == nullptr) {
return nullptr;
Expand All @@ -490,8 +490,8 @@ std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler
argball->height_read_ = target_output_height;
}
const int crop_offset = flags.crop_x * components * sizeof(JSAMPLE);
const uint8* full_image_ptr = full_image.get() + flags.crop_y * full_image_stride;
uint8* crop_image_ptr = dstdata.get();
const uint8_t* full_image_ptr = full_image.get() + flags.crop_y * full_image_stride;
uint8_t* crop_image_ptr = dstdata.get();
for (int i = 0; i < argball->height_read_; i++) {
memcpy(crop_image_ptr, full_image_ptr + crop_offset, min_stride);
crop_image_ptr += stride;
Expand All @@ -513,7 +513,7 @@ std::unique_ptr<uint8[]> UncompressLow(const void* srcdata, FewerArgsForCompiler
// associated libraries aren't good enough to guarantee that 7
// parameters won't get clobbered by the longjmp. So we help
// it out a little.
std::unique_ptr<uint8[]> Uncompress(const void* srcdata, int datasize,
std::unique_ptr<uint8_t[]> Uncompress(const void* srcdata, int datasize,
const UncompressFlags& flags) {
FewerArgsForCompiler argball(datasize, flags);
auto dstdata = UncompressLow(srcdata, &argball);
Expand All @@ -532,7 +532,7 @@ std::unique_ptr<uint8[]> Uncompress(const void* srcdata, int datasize,
// set the unread pixels to black
if (argball.height_read_ != argball.height_) {
const int first_bad_line = argball.height_read_;
uint8* start = dstdata.get() + first_bad_line * argball.stride_;
uint8_t* start = dstdata.get() + first_bad_line * argball.stride_;
const int nbytes = (argball.height_ - first_bad_line) * argball.stride_;
memset(static_cast<void*>(start), 0, nbytes);
}
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/decoder/jpeg/jpeg_mem.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ struct UncompressFlags {
// datasize.
// The function returns a shared pointer to the uncompressed data or a null pointer if
// there was an error.
std::unique_ptr<uint8[]> Uncompress(const void* srcdata, int datasize,
std::unique_ptr<uint8_t[]> Uncompress(const void* srcdata, int datasize,
const UncompressFlags& flags);

// Read jpeg header and get image information. Returns true on success.
Expand Down
2 changes: 1 addition & 1 deletion dali/operators/decoder/peek_shape/peek_image_shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class PeekImageShape : public StatelessOperator<CPUBackend> {
thread_pool.AddWork([sample_id, &input, &output, this] (int tid) {
const auto& image = input[sample_id];
auto img =
ImageFactory::CreateImage(image.data<uint8>(), image.shape().num_elements(), {});
ImageFactory::CreateImage(image.data<uint8_t>(), image.shape().num_elements(), {});
auto shape = img->PeekShape();
TYPE_SWITCH(output_type_, type2id, type,
(int32_t, uint32_t, int64_t, uint64_t, float, double),
Expand Down
6 changes: 4 additions & 2 deletions dali/operators/generic/flip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

#include <vector>
#include "dali/core/static_switch.h"
#include "dali/core/error_handling.h"
#include "dali/operators/generic/flip.h"
#include "dali/kernels/imgproc/flip_cpu.h"
#include "dali/kernels/kernel_params.h"
Expand Down Expand Up @@ -42,7 +44,7 @@ Flip<CPUBackend>::Flip(const OpSpec &spec)
void RunFlip(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input,
const TensorLayout &layout,
bool horizontal, bool vertical, bool depthwise) {
DALI_TYPE_SWITCH(input.type(), DType,
TYPE_SWITCH(input.type(), type2id, DType, (DALI_NUMERIC_TYPES), (
auto output_ptr = output.mutable_data<DType>();
auto input_ptr = input.data<DType>();
auto kernel = kernels::FlipCPU<DType>();
Expand All @@ -54,7 +56,7 @@ void RunFlip(Tensor<CPUBackend> &output, const Tensor<CPUBackend> &input,
auto out_shape = reqs.output_shapes[0][0].to_static<flip_ndim>();
auto out_view = kernels::OutTensorCPU<DType, flip_ndim>(output_ptr, out_shape);
kernel.Run(ctx, out_view, in_view, depthwise, vertical, horizontal);
)
), (DALI_FAIL(make_string("The element type ", input.type(), " is not supported.")))); // NOLINT
}

template <>
Expand Down
11 changes: 6 additions & 5 deletions dali/operators/generic/flip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include <cuda_runtime_api.h>
#include <vector>
#include "dali/core/static_switch.h"
#include "dali/core/error_handling.h"
#include "dali/kernels/imgproc/flip_gpu.cuh"
#include "dali/operators/generic/flip.h"
#include "dali/operators/generic/flip_util.h"
Expand All @@ -25,10 +27,9 @@ template <>
Flip<GPUBackend>::Flip(const OpSpec &spec) : StatelessOperator<GPUBackend>(spec) {}

void RunKernel(TensorList<GPUBackend> &output, const TensorList<GPUBackend> &input,
const std::vector<int32> &depthwise, const std::vector<int32> &horizontal,
const std::vector<int32> &vertical, cudaStream_t stream) {
DALI_TYPE_SWITCH(
input.type(), DType,
const std::vector<int32_t> &depthwise, const std::vector<int32_t> &horizontal,
const std::vector<int32_t> &vertical, cudaStream_t stream) {
TYPE_SWITCH(input.type(), type2id, DType, (DALI_NUMERIC_TYPES), (
auto in_shape = TransformShapes(input.shape(), input.GetLayout());
auto in_view = reshape<flip_ndim>(view<const DType>(input), in_shape);
kernels::KernelContext ctx;
Expand All @@ -38,7 +39,7 @@ void RunKernel(TensorList<GPUBackend> &output, const TensorList<GPUBackend> &inp
auto out_shape = reqs.output_shapes[0].to_static<flip_ndim>();
auto out_view = reshape<flip_ndim>(view<DType>(output), out_shape);
kernel.Run(ctx, out_view, in_view, depthwise, vertical, horizontal);
)
), (DALI_FAIL(make_string("The element type ", input.type(), " is not supported.")))); // NOLINT
}

template <>
Expand Down
26 changes: 13 additions & 13 deletions dali/operators/image/paste/paste.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ __launch_bounds__(PASTE_BLOCKSIZE, 1)
void BatchedPaste(
const int N,
const int C,
const uint8* const __restrict__ fill_value,
const uint8* const * const __restrict__ in_batch,
uint8* const* const __restrict__ out_batch,
const uint8_t* const __restrict__ fill_value,
const uint8_t* const * const __restrict__ in_batch,
uint8_t* const* const __restrict__ out_batch,
const int* const __restrict__ in_out_dims_paste_yx) {
const int n = blockIdx.x;

Expand All @@ -40,7 +40,7 @@ void BatchedPaste(
constexpr int nWaves = blockSize / nThreadsPerWave;
constexpr int MAX_C = 1024;

__shared__ uint8 rgb[MAX_C];
__shared__ uint8_t rgb[MAX_C];
__shared__ int jump[MAX_C];
for (int i = threadIdx.x; i < C; i += blockDim.x) {
rgb[i] = fill_value[i % C];
Expand All @@ -55,8 +55,8 @@ void BatchedPaste(
const int paste_y = in_out_dims_paste_yx[offset + 4];
const int paste_x = in_out_dims_paste_yx[offset + 5];

const uint8* const input_ptr = in_batch[n];
uint8 * const output_ptr = out_batch[n];
const uint8_t* const input_ptr = in_batch[n];
uint8_t * const output_ptr = out_batch[n];

__syncthreads();

Expand Down Expand Up @@ -112,9 +112,9 @@ void Paste<GPUBackend>::RunHelper(Workspace &ws) {
BatchedPaste<<<curr_batch_size, PASTE_BLOCKSIZE, 0, ws.stream()>>>(
curr_batch_size,
C_,
fill_value_.template data<uint8>(),
input_ptrs_gpu_.template data<const uint8*>(),
output_ptrs_gpu_.template data<uint8*>(),
fill_value_.template data<uint8_t>(),
input_ptrs_gpu_.template data<const uint8_t*>(),
output_ptrs_gpu_.template data<uint8_t*>(),
in_out_dims_paste_yx_gpu_.template data<int>());
}

Expand Down Expand Up @@ -173,10 +173,10 @@ void Paste<GPUBackend>::SetupSampleParams(Workspace &ws) {
output.SetLayout("HWC");

for (int i = 0; i < curr_batch_size; ++i) {
input_ptrs_.template mutable_data<const uint8*>()[i] =
input.template tensor<uint8>(i);
output_ptrs_.template mutable_data<uint8*>()[i] =
output.template mutable_tensor<uint8>(i);
input_ptrs_.template mutable_data<const uint8_t*>()[i] =
input.template tensor<uint8_t>(i);
output_ptrs_.template mutable_data<uint8_t*>()[i] =
output.template mutable_tensor<uint8_t>(i);
}

// Copy pointers on the GPU for fast access
Expand Down
Loading
Loading