Skip to content

GPU processor refactoring #1787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/common/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,6 @@ enum MarkersPalette : int {
tracy::SourceLocationData{nullptr, name, TracyFile, (uint32_t)TracyLine, 0};

#define FRAME_END FrameMark

#define FIBER_ENTER(name) TracyFiberEnter(name)
#define FIBER_EXIT TracyFiberLeave
13 changes: 7 additions & 6 deletions src/core/debug_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
using namespace DebugStateType;

DebugStateImpl& DebugState = *Common::Singleton<DebugStateImpl>::Instance();
extern std::unique_ptr<AmdGpu::Liverpool> liverpool;

static ThreadID ThisThreadID() {
#ifdef _WIN32
Expand Down Expand Up @@ -142,8 +143,7 @@ void DebugStateImpl::PushQueueDump(QueueDump dump) {
frame.queues.push_back(std::move(dump));
}

void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs, bool is_compute) {
void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute) {
std::scoped_lock lock{frame_dump_list_mutex};
const auto it = waiting_reg_dumps.find(header_addr);
if (it == waiting_reg_dumps.end()) {
Expand All @@ -153,18 +153,19 @@ void DebugStateImpl::PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
waiting_reg_dumps.erase(it);
waiting_reg_dumps_dbg.erase(waiting_reg_dumps_dbg.find(header_addr));
auto& dump = frame.regs[header_addr - base_addr];
dump.regs = regs;
dump.regs = liverpool->regs;
if (is_compute) {
dump.is_compute = true;
const auto& cs = dump.regs.cs_program;
auto& cs = dump.regs.cs_program;
cs = liverpool->GetCsRegs();
dump.cs_data = PipelineComputerProgramDump{
.cs_program = cs,
.code = std::vector<u32>{cs.Code().begin(), cs.Code().end()},
};
} else {
for (int i = 0; i < RegDump::MaxShaderStages; i++) {
if (regs.stage_enable.IsStageEnabled(i)) {
auto stage = regs.ProgramForStage(i);
if (dump.regs.stage_enable.IsStageEnabled(i)) {
auto stage = dump.regs.ProgramForStage(i);
if (stage->address_lo != 0) {
auto code = stage->Code();
dump.stages[i] = PipelineShaderProgramDump{
Expand Down
4 changes: 1 addition & 3 deletions src/core/debug_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <queue>

#include "common/types.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"

#ifdef _WIN32
Expand Down Expand Up @@ -203,8 +202,7 @@ class DebugStateImpl {

void PushQueueDump(QueueDump dump);

void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr,
const AmdGpu::Liverpool::Regs& regs, bool is_compute = false);
void PushRegsDump(uintptr_t base_addr, uintptr_t header_addr, bool is_compute = false);

void CollectShader(const std::string& name, Shader::LogicalStage l_stage,
vk::ShaderModule module, std::span<const u32> spv,
Expand Down
36 changes: 20 additions & 16 deletions src/core/libraries/gnmdriver/gnmdriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,17 +296,12 @@ static_assert(CtxInitSequence400.size() == 0x61);
// In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{};
std::condition_variable cv_lock{};
static std::mutex m_submission{};
std::mutex m_submission{};
static u64 frames_submitted{}; // frame counter
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
static int sdk_version{0};

struct AscQueueInfo {
VAddr map_addr;
u32* read_addr;
u32 ring_size_dw;
};
static Common::SlotVector<AscQueueInfo> asc_queues{};
static u32 asc_next_offs_dw[Liverpool::NumComputeRings];
static constexpr VAddr tessellation_factors_ring_addr = Core::SYSTEM_RESERVED_MAX - 0xFFFFFFF;
static constexpr u32 tessellation_offchip_buffer_size = 0x800000u;

Expand Down Expand Up @@ -506,11 +501,19 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
}

auto vqid = gnm_vqid - 1;
auto& asc_queue = asc_queues[{vqid}];
const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr + *asc_queue.read_addr);
const auto acb_size = next_offs_dw ? (next_offs_dw << 2u) - *asc_queue.read_addr
: (asc_queue.ring_size_dw << 2u) - *asc_queue.read_addr;
const std::span acb_span{acb_ptr, acb_size >> 2u};
auto& asc_queue = liverpool->asc_queues[{vqid}];

const auto& offs_dw = asc_next_offs_dw[vqid];

if (next_offs_dw < offs_dw) {
ASSERT_MSG(next_offs_dw == 0, "ACB submission is split at the end of ring buffer");
}

const auto* acb_ptr = reinterpret_cast<const u32*>(asc_queue.map_addr) + offs_dw;
const auto acb_size_dw = (next_offs_dw ? next_offs_dw : asc_queue.ring_size_dw) - offs_dw;
const std::span acb_span{acb_ptr, acb_size_dw};

asc_next_offs_dw[vqid] = next_offs_dw;

if (DebugState.DumpingCurrentFrame()) {
static auto last_frame_num = -1LL;
Expand Down Expand Up @@ -545,9 +548,6 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
});
}
liverpool->SubmitAsc(gnm_vqid, acb_span);

*asc_queue.read_addr += acb_size;
*asc_queue.read_addr %= asc_queue.ring_size_dw * 4;
}

void PS4_SYSV_ABI sceGnmDingDongForWorkload(u32 gnm_vqid, u32 next_offs_dw, u64 workload_id) {
Expand Down Expand Up @@ -1266,12 +1266,16 @@ int PS4_SYSV_ABI sceGnmMapComputeQueue(u32 pipe_id, u32 queue_id, VAddr ring_bas
return ORBIS_GNM_ERROR_COMPUTEQUEUE_INVALID_READ_PTR_ADDR;
}

auto vqid = asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw);
const auto vqid =
liverpool->asc_queues.insert(VAddr(ring_base_addr), read_ptr_addr, ring_size_dw, pipe_id);
// We need to offset index as `dingDong` assumes it to be from the range [1..64]
const auto gnm_vqid = vqid.index + 1;
LOG_INFO(Lib_GnmDriver, "ASC pipe {} queue {} mapped to vqueue {}", pipe_id, queue_id,
gnm_vqid);

const auto& queue = liverpool->asc_queues[vqid];
*queue.read_addr = 0u;

return gnm_vqid;
}

Expand Down
2 changes: 1 addition & 1 deletion src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ void EmitContext::DefineInputs() {
});
// Note that we pass index rather than Id
input_params[attrib.semantic] = SpirvAttribute{
.id = rate_idx,
.id = {rate_idx},
.pointer_type = input_u32,
.component_type = U32[1],
.num_components = std::min<u16>(attrib.num_elements, num_components),
Expand Down
Loading
Loading