Skip to content

Commit 3ac730d

Browse files
committed
Bump to AcceleratedKernels v0.3.0
1 parent 250baeb commit 3ac730d

10 files changed

+124
-6
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,8 @@ docs/site/
2222
# committed for packages, but should be committed for applications that require a static
2323
# environment.
2424
Manifest.toml
25+
26+
# Files generated while profiling
27+
*.log
28+
*.pb.gz
29+
*.obj

Project.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ImplicitBVH"
22
uuid = "932a18dc-bb55-4cd5-bdd6-1368ec9cea29"
33
authors = ["Andrei Leonard Nicusan <[email protected]> and contributors"]
4-
version = "0.5.1"
4+
version = "0.5.2"
55

66
[deps]
77
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
@@ -13,7 +13,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
1313
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1414

1515
[compat]
16-
AcceleratedKernels = "0.2"
16+
AcceleratedKernels = "0.3"
1717
ArgCheck = "2"
1818
Atomix = "0.1, 1"
1919
DocStringExtensions = "0.9"

prototype/Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
33
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
44
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
5-
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
65
Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f"
76
ImplicitBVH = "932a18dc-bb55-4cd5-bdd6-1368ec9cea29"
87
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
8+
Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
99
PProf = "e4faabce-9ead-11e9-39d9-4379958e3056"
1010
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
1111
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"

prototype/interacting_particles.jl

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
using ImplicitBVH
2+
using ImplicitBVH: BBox, BSphere
3+
import AcceleratedKernels as AK
4+
5+
using Profile
6+
using PProf
7+
8+
using Metal
9+
# using AtomixMetal
10+
11+
using Random
12+
Random.seed!(0)
13+
14+
15+
function get_interacting_pairs(particle_centers::AbstractMatrix, cutoff::AbstractFloat)
16+
17+
# Construct bounding sphere around each particle of `cutoff` radius
18+
num_particles = size(particle_centers, 2)
19+
bounding_volumes = similar(particle_centers, BSphere{Float32}, num_particles)
20+
AK.foreachindex(bounding_volumes) do i
21+
bounding_volumes[i] = BSphere{Float32}(
22+
(particle_centers[1, i], particle_centers[2, i], particle_centers[3, i]),
23+
cutoff,
24+
)
25+
end
26+
27+
# Construct BVH, merging BSpheres into BBoxes, and using 32-bit Morton indices
28+
bvh = BVH(bounding_volumes, BBox{Float32}, UInt32, default_start_level(num_particles))
29+
30+
# Traverse BVH - this returns a BVHTraversal
31+
contacting_pairs = traverse(bvh)
32+
33+
# Return Vector{Tuple{Int32, Int32}} of particle index pairs
34+
contacting_pairs.contacts
35+
end
36+
37+
38+
particle_centers = rand(Float32, 3, 12_486)
39+
# particle_centers = MtlArray(particle_centers)
40+
interacting_pairs = get_interacting_pairs(particle_centers, 0.0312f0)
41+
42+
43+
44+
45+
# Collect an allocation profile
46+
Profile.Allocs.clear()
47+
Profile.Allocs.@profile get_interacting_pairs(particle_centers, 0.0312f0)
48+
PProf.Allocs.pprof()
49+
50+
51+
52+
# Example output:
53+
# julia> @show interacting_pairs
54+
# interacting_pairs = Tuple{Int32, Int32}[(369, 667), (427, 974), ...]
55+
56+
57+
# using BenchmarkTools
58+
# @benchmark get_interacting_pairs(particle_centers, 0.0312f0)
59+
60+
61+

prototype/mtl_test.jl

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
using KernelAbstractions
2+
using Atomix: @atomic
3+
using Metal
4+
5+
6+
@kernel cpu=false inbounds=true function _traverse_nodes_gpu!(
7+
dst, level,
8+
)
9+
@atomic dst[level] += 1
10+
11+
ithread = @index(Local, Linear)
12+
13+
if typeof(ithread) === Int32
14+
dst[1] = 1
15+
elseif typeof(ithread) === Int64
16+
dst[1] = 2
17+
elseif typeof(ithread) === UInt32
18+
dst[1] = 3
19+
else
20+
dst[1] = sizeof(ithread)
21+
end
22+
23+
@synchronize()
24+
end
25+
26+
27+
dst = MtlArray{UInt32}([0,0,0])
28+
level = 2
29+
30+
kernel = _traverse_nodes_gpu!(get_backend(dst), 128)
31+
kernel(dst, level, ndrange=length(dst))
32+
KernelAbstractions.synchronize(get_backend(dst))
33+
34+

src/morton.jl

+2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ function _compute_extrema(bounding_volumes::AbstractGPUVector, options)
150150
min_centers, # Reduce to the 3D minimum
151151
bounding_volumes,
152152
init=(floatmax(T), floatmax(T), floatmax(T)),
153+
neutral=(floatmax(T), floatmax(T), floatmax(T)),
153154
block_size=options.block_size,
154155
)
155156

@@ -158,6 +159,7 @@ function _compute_extrema(bounding_volumes::AbstractGPUVector, options)
158159
max_centers,
159160
bounding_volumes,
160161
init=(floatmin(T), floatmin(T), floatmin(T)),
162+
neutral=(floatmin(T), floatmin(T), floatmin(T)),
161163
block_size=options.block_size,
162164
)
163165

src/traverse/traverse_pair.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ function traverse(
8888
extra = if bvtt1 isa AbstractGPUVector
8989
# For GPUs we need an additional global offset to coordinate writing results
9090
backend = get_backend(bvtt1)
91-
KernelAbstractions.zeros(backend, index_type, bvh1.tree.levels * bvh2.tree.levels)
91+
KernelAbstractions.zeros(backend, index_type, Int(bvh1.tree.levels * bvh2.tree.levels))
9292
else
9393
# For CPUs we need a vector of spawned tasks and a contact counter for each task
9494
tasks = Vector{Task}(undef, options.num_threads)

src/traverse/traverse_single.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ function fill_initial_bvtt_single!(bvtt1, num_levels, start_level, level_nodes,
149149
if backend isa GPU
150150

151151
# Convert linear index k to upper triangular (i, j) indices for a matrix of side n; 0-index
152-
@fastmath function tri_ij(n::I, k::I) where I <: Integer
152+
function tri_ij(n::I, k::I) where I <: Integer
153153
a = Float32(-8 * k + 4 * n * (n - 1) - 7)
154154
b = unsafe_trunc(I, sqrt(a) / 2.0f0 - 0.5f0)
155155
i = n - 2 - b

test/gputests.jl

+15
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,21 @@ function array_from_host(h_arr::AbstractArray, dtype=nothing)
2121
end
2222

2323

24+
@testset "mortons_gpu_$(backend)" begin
25+
# This tests BV bounds computation too
26+
Random.seed!(42)
27+
for num_entities in 1:200
28+
bvs = map(BSphere{Float32}, [6 * rand(3) .+ rand(3, 3) for _ in 1:num_entities])
29+
bvs_gpu = array_from_host(bvs)
30+
31+
mortons = ImplicitBVH.morton_encode(bvs)
32+
mortons_gpu = ImplicitBVH.morton_encode(bvs_gpu)
33+
34+
@test all(mortons .== Array(mortons_gpu))
35+
end
36+
end
37+
38+
2439
@testset "bvh_gpu_$(backend)_single_randomised" begin
2540
# Random bounding volumes of different densities; BSphere leaves, BSphere nodes
2641
Random.seed!(42)

test/runtests.jl

+2-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,6 @@ end
333333
point = [0., 0., 0.]
334334
direction = [0., 0., -1.]
335335
@test isintersection(sphere, point, direction) == true
336-
337336
end
338337

339338

@@ -693,6 +692,8 @@ end
693692
end
694693

695694

695+
696+
696697
@testset "bvh_single_randomised" begin
697698
# Random bounding volumes of different densities; BSphere leaves, BSphere nodes
698699
Random.seed!(42)

0 commit comments

Comments
 (0)