Skip to content

Commit 0fd1ab6

Browse files
authored
GPU processor refactoring (#1787)
* coroutine code prettification * asc queues submission refactoring * better asc ring context handling * final touches and review notes * even more simplification for context saving
1 parent af26c94 commit 0fd1ab6

File tree

12 files changed

+234
-146
lines changed

12 files changed

+234
-146
lines changed

src/common/debug.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,6 @@ enum MarkersPalette : int {
5757
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};
5858

5959
#define FRAME_END FrameMark
60+
61+
#define FIBER_ENTER(name) TracyFiberEnter(name)
62+
#define FIBER_EXIT TracyFiberLeave

src/core/debug_state.cpp

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -142,41 +142,61 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
142142
frame.queues.push_back(std::move(dump));
143143
}
144144

145-
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
146-
const AmdGpu::Liverpool::Regs& regs, bool is_compute) {
147-
std::scoped_lock lock{frame_dump_list_mutex};
145+
std::optional<RegDump*> DebugStateImpl::GetRegDump(uintptr_t base_addr, uintptr_t header_addr) {
148146
const auto it = waiting_reg_dumps.find(header_addr);
149147
if (it == waiting_reg_dumps.end()) {
150-
return;
148+
return std::nullopt;
151149
}
152150
auto& frame = *it->second;
153151
waiting_reg_dumps.erase(it);
154152
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
155-
auto& dump = frame.regs[header_addr - base_addr];
156-
dump.regs = regs;
157-
if (is_compute) {
158-
dump.is_compute = true;
159-
const auto& cs = dump.regs.cs_program;
160-
dump.cs_data = PipelineComputerProgramDump{
161-
.cs_program = cs,
162-
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
163-
};
164-
} else {
165-
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
166-
if (regs.stage_enable.IsStageEnabled(i)) {
167-
auto stage = regs.ProgramForStage(i);
168-
if (stage->address_lo != 0) {
169-
auto code = stage->Code();
170-
dump.stages[i] = PipelineShaderProgramDump{
171-
.user_data = *stage,
172-
.code = std::vector<u32>{code.begin(), code.end()},
173-
};
174-
}
153+
return &frame.regs[header_addr - base_addr];
154+
}
155+
156+
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
157+
const AmdGpu::Liverpool::Regs& regs) {
158+
std::scoped_lock lock{frame_dump_list_mutex};
159+
160+
auto dump = GetRegDump(base_addr, header_addr);
161+
if (!dump) {
162+
return;
163+
}
164+
165+
(*dump)->regs = regs;
166+
167+
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
168+
if ((*dump)->regs.stage_enable.IsStageEnabled(i)) {
169+
auto stage = (*dump)->regs.ProgramForStage(i);
170+
if (stage->address_lo != 0) {
171+
auto code = stage->Code();
172+
(*dump)->stages[i] = PipelineShaderProgramDump{
173+
.user_data = *stage,
174+
.code = std::vector<u32>{code.begin(), code.end()},
175+
};
175176
}
176177
}
177178
}
178179
}
179180

181+
void DebugStateImpl::PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr,
182+
const CsState& cs_state) {
183+
std::scoped_lock lock{frame_dump_list_mutex};
184+
185+
auto dump = GetRegDump(base_addr, header_addr);
186+
if (!dump) {
187+
return;
188+
}
189+
190+
(*dump)->is_compute = true;
191+
auto& cs = (*dump)->regs.cs_program;
192+
cs = cs_state;
193+
194+
(*dump)->cs_data = PipelineComputerProgramDump{
195+
.cs_program = cs,
196+
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
197+
};
198+
}
199+
180200
void DebugStateImpl::CollectShader(const std::string& name, Shader::LogicalStage l_stage,
181201
vk::ShaderModule module, std::span<const u32> spv,
182202
std::span<const u32> raw_code, std::span<const u32> patch_spv,

src/core/debug_state.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include <queue>
1212

1313
#include "common/types.h"
14-
#include "video_core/amdgpu/liverpool.h"
1514
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
1615

1716
#ifdef _WIN32
@@ -204,12 +203,17 @@ class DebugStateImpl {
204203
void PushQueueDump(QueueDump dump);
205204

206205
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
207-
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
206+
const AmdGpu::Liverpool::Regs& regs);
207+
using CsState = AmdGpu::Liverpool::ComputeProgram;
208+
void PushRegsDumpCompute(uintptr_t base_addr, uintptr_t header_addr, const CsState& cs_state);
208209

209210
void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
210211
vk::ShaderModule module, std::span<const u32> spv,
211212
std::span<const u32> raw_code, std::span<const u32> patch_spv,
212213
bool is_patched);
214+
215+
private:
216+
std::optional<RegDump*> GetRegDump(uintptr_t base_addr, uintptr_t header_addr);
213217
};
214218
} // namespace DebugStateType
215219

src/core/libraries/gnmdriver/gnmdriver.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
296296
// In case if `submitDone` is issued we need to block submissions until GPU idle
297297
static u32 submission_lock{};
298298
std::condition_variable cv_lock{};
299-
static std::mutex m_submission{};
299+
std::mutex m_submission{};
300300
static u64 frames_submitted{}; // frame counter
301301
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
302302
static int sdk_version{0};
303303

304-
struct AscQueueInfo {
305-
VAddr map_addr;
306-
u32* read_addr;
307-
u32 ring_size_dw;
308-
};
309-
static Common::SlotVector<AscQueueInfo> asc_queues{};
304+
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
310305
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
311306
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;
312307

@@ -506,11 +501,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
506501
}
507502

508503
auto vqid = gnm_vqid - 1;
509-
auto& asc_queue = asc_queues[{vqid}];
510-
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
511-
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
512-
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
513-
const std::span acb_span{acb_ptr, acb_size >> 2u};
504+
auto& asc_queue = liverpool->asc_queues[{vqid}];
505+
506+
const auto& offs_dw = asc_next_offs_dw[vqid];
507+
508+
if (next_offs_dw < offs_dw) {
509+
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
510+
}
511+
512+
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
513+
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
514+
const std::span acb_span{acb_ptr, acb_size_dw};
515+
516+
asc_next_offs_dw[vqid] = next_offs_dw;
514517

515518
if (DebugState.DumpingCurrentFrame()) {
516519
static auto last_frame_num = -1LL;
@@ -545,9 +548,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
545548
});
546549
}
547550
liverpool->SubmitAsc(gnm_vqid, acb_span);
548-
549-
*asc_queue.read_addr += acb_size;
550-
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
551551
}
552552

553553
void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
@@ -1266,12 +1266,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
12661266
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
12671267
}
12681268

1269-
auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
1269+
const auto vqid =
1270+
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
12701271
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
12711272
const auto gnm_vqid = vqid.index + 1;
12721273
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
12731274
gnm_vqid);
12741275

1276+
const auto& queue = liverpool->asc_queues[vqid];
1277+
*queue.read_addr = 0u;
1278+
12751279
return gnm_vqid;
12761280
}
12771281

src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ void EmitContext::DefineInputs() {
294294
});
295295
// Note that we pass index rather than Id
296296
input_params[attrib.semantic] = SpirvAttribute{
297-
.id = rate_idx,
297+
.id = {rate_idx},
298298
.pointer_type = input_u32,
299299
.component_type = U32[1],
300300
.num_components = std::min<u16>(attrib.num_elements, num_components),

0 commit comments

Comments
 (0)