From aa2c27b22de7237ddb3dc6f4bed90cd8da89ce90 Mon Sep 17 00:00:00 2001 From: Qing Tao <55564570+VickyAtAZ@users.noreply.github.com> Date: Wed, 16 Jun 2021 13:41:27 -0700 Subject: [PATCH] ATOM-15780 Improve cpu profiler allows pause and output profiling data to a file for reference (#1358) - Added a pause button in imgui cpu profiler. - Added a capture button to save cpu profiling data to a data file. - Added some profile marks in both RPI and RHI. --- .../ProfilingCaptureSystemComponent.cpp | 18 +++++-- .../RHI/Code/Include/Atom/RHI/CpuProfiler.h | 3 ++ .../Code/Include/Atom/RHI/CpuProfilerImpl.h | 1 + .../Include/Atom/RHI/MemorySubAllocator.h | 2 + .../Code/Include/Atom/RHI/ObjectCollector.h | 2 + .../Atom/RHI/Code/Source/RHI/CommandQueue.cpp | 2 + .../RHI/Code/Source/RHI/CpuProfilerImpl.cpp | 6 +++ Gems/Atom/RHI/Code/Source/RHI/Device.cpp | 3 ++ Gems/Atom/RHI/Code/Source/RHI/FrameGraph.cpp | 16 +++--- .../Code/Source/RHI/FrameGraphExecuter.cpp | 7 +-- .../RHI/Code/Source/RHI/FrameScheduler.cpp | 16 ++++-- Gems/Atom/RHI/Code/Source/RHI/RHISystem.cpp | 11 ++-- .../DX12/Code/Source/RHI/CommandListPool.cpp | 2 + .../Code/Source/RHI/CommandQueueContext.cpp | 3 +- .../Code/Source/RHI/DescriptorContext.cpp | 2 + .../Source/RHI/StagingMemoryAllocator.cpp | 1 + .../RPI.Public/GpuQuery/GpuQuerySystem.cpp | 2 + .../RPI/Code/Source/RPI.Public/RPISystem.cpp | 29 ++++++----- .../Atom/RPI/Code/Source/RPI.Public/Scene.cpp | 25 ++++++--- .../Include/Atom/Utils/ImGuiCpuProfiler.h | 11 ++++ .../Include/Atom/Utils/ImGuiCpuProfiler.inl | 51 +++++++++++++++++-- 21 files changed, 168 insertions(+), 45 deletions(-) diff --git a/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp b/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp index 9cc98a15ec..295e10a53f 100644 --- a/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp +++ b/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp @@ -458,9 +458,13 @@ namespace AZ bool ProfilingCaptureSystemComponent::CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath) { // Start the cpu profiling - RHI::CpuProfiler::Get()->SetProfilerEnabled(true); + bool wasEnabled = RHI::CpuProfiler::Get()->IsProfilerEnabled(); + if (!wasEnabled) + { + RHI::CpuProfiler::Get()->SetProfilerEnabled(true); + } - const bool captureStarted = m_cpuProfilingStatisticsCapture.StartCapture([this, outputFilePath]() + const bool captureStarted = m_cpuProfilingStatisticsCapture.StartCapture([this, outputFilePath, wasEnabled]() { JsonSerializerSettings serializationSettings; serializationSettings.m_keepDefaults = true; @@ -481,14 +485,22 @@ namespace AZ saveResult.GetError().c_str()); AZ_Warning("ProfilingCaptureSystemComponent", false, captureInfo.c_str()); } + else + { + AZ_Printf("ProfilingCaptureSystemComponent", "Cpu profiling statistics was saved to file [%s]\n", outputFilePath.c_str()); + } // Disable the profiler again - RHI::CpuProfiler::Get()->SetProfilerEnabled(false); + if (!wasEnabled) + { + RHI::CpuProfiler::Get()->SetProfilerEnabled(false); + } // Notify listeners that the pass' PipelineStatistics queries capture has finished. ProfilingCaptureNotificationBus::Broadcast(&ProfilingCaptureNotificationBus::Events::OnCaptureCpuProfilingStatisticsFinished, saveResult.IsSuccess(), captureInfo); + }); // Start the TickBus. diff --git a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h index cf3a31adf6..b5c8f144ba 100644 --- a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h +++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h @@ -12,6 +12,7 @@ #pragma once +#include #include #include #include @@ -87,6 +88,8 @@ namespace AZ //! Enable/Disable the CpuProfiler virtual void SetProfilerEnabled(bool enabled) = 0; + + virtual bool IsProfilerEnabled() const = 0 ; }; } // namespace RPI diff --git a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h index 30523194b6..9685f9be98 100644 --- a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h +++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h @@ -102,6 +102,7 @@ namespace AZ void EndTimeRegion() final; void FlushTimeRegionMap(TimeRegionMap& timeRegionMap) final; void SetProfilerEnabled(bool enabled) final; + bool IsProfilerEnabled() const final; private: // Lazily create and register the local thread data diff --git a/Gems/Atom/RHI/Code/Include/Atom/RHI/MemorySubAllocator.h b/Gems/Atom/RHI/Code/Include/Atom/RHI/MemorySubAllocator.h index 47abf5409d..75fd3035bf 100644 --- a/Gems/Atom/RHI/Code/Include/Atom/RHI/MemorySubAllocator.h +++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/MemorySubAllocator.h @@ -11,6 +11,7 @@ */ #pragma once +#include #include #include #include @@ -160,6 +161,7 @@ namespace AZ template void MemorySubAllocator::GarbageCollect() { + AZ_ATOM_PROFILE_FUNCTION("RHI", "MemorySubAllocator: GarbageCollect"); for (PageContext& pageContext : m_pageContexts) { pageContext.m_allocator.GarbageCollect(); diff --git a/Gems/Atom/RHI/Code/Include/Atom/RHI/ObjectCollector.h b/Gems/Atom/RHI/Code/Include/Atom/RHI/ObjectCollector.h index 98d63accc9..a3a07c1533 100644 --- a/Gems/Atom/RHI/Code/Include/Atom/RHI/ObjectCollector.h +++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/ObjectCollector.h @@ -11,6 +11,7 @@ */ #pragma once +#include #include #include #include @@ -169,6 +170,7 @@ namespace AZ template void ObjectCollector::Collect(bool forceFlush) { + AZ_ATOM_PROFILE_FUNCTION("DX12", "ObjectCollector: Collect"); m_mutex.lock(); if (m_pendingObjects.size()) { diff --git a/Gems/Atom/RHI/Code/Source/RHI/CommandQueue.cpp b/Gems/Atom/RHI/Code/Source/RHI/CommandQueue.cpp index 700d09d7bf..57a2b541f1 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/CommandQueue.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/CommandQueue.cpp @@ -11,6 +11,7 @@ */ #include +#include #include namespace AZ @@ -86,6 +87,7 @@ namespace AZ void CommandQueue::FlushCommands() { + AZ_ATOM_PROFILE_FUNCTION("RHI", "CommandQueue: FlushCommands"); while (!m_isWorkQueueEmpty && !m_isQuitting) { AZStd::this_thread::yield(); diff --git a/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp b/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp index 8242c89886..9b77e7566f 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp @@ -186,6 +186,12 @@ namespace AZ } } + bool CpuProfilerImpl::IsProfilerEnabled() const + { + return m_enabled; + } + + void CpuProfilerImpl::RegisterThreadStorage() { AZStd::unique_lock lock(m_threadRegisterMutex); diff --git a/Gems/Atom/RHI/Code/Source/RHI/Device.cpp b/Gems/Atom/RHI/Code/Source/RHI/Device.cpp index 4c54ce3237..7b7dc2de8c 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/Device.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/Device.cpp @@ -10,6 +10,7 @@ * */ +#include #include #include @@ -151,6 +152,7 @@ namespace AZ { if (ValidateIsInitialized() && ValidateIsInFrame()) { + AZ_ATOM_PROFILE_FUNCTION("RHI", "Device: EndFrame"); EndFrameInternal(); m_isInFrame = false; return ResultCode::Success; @@ -172,6 +174,7 @@ namespace AZ { if (ValidateIsInitialized() && ValidateIsNotInFrame()) { + AZ_ATOM_PROFILE_FUNCTION("RHI", "Device: CompileMemoryStatistics"); MemoryStatisticsBuilder builder; builder.Begin(memoryStatistics, reportFlags); CompileMemoryStatisticsInternal(builder); diff --git a/Gems/Atom/RHI/Code/Source/RHI/FrameGraph.cpp b/Gems/Atom/RHI/Code/Source/RHI/FrameGraph.cpp index 767ab83c47..1788d0e85c 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/FrameGraph.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/FrameGraph.cpp @@ -9,18 +9,19 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ -#include -#include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include #include +#include #include +#include #include +#include +#include #include #include @@ -76,6 +77,7 @@ namespace AZ void FrameGraph::Clear() { + AZ_ATOM_PROFILE_FUNCTION("RHI", "FrameGraph: Clear"); for (Scope* scope : m_scopes) { scope->Deactivate(); diff --git a/Gems/Atom/RHI/Code/Source/RHI/FrameGraphExecuter.cpp b/Gems/Atom/RHI/Code/Source/RHI/FrameGraphExecuter.cpp index dff49a9086..06c8791655 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/FrameGraphExecuter.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/FrameGraphExecuter.cpp @@ -9,10 +9,11 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ -#include #include -#include +#include #include +#include +#include #include #include @@ -80,7 +81,7 @@ namespace AZ void FrameGraphExecuter::End() { - AZ_TRACE_METHOD(); + AZ_ATOM_PROFILE_FUNCTION("RHI", "FrameGraphExecuter: End"); AZ_Assert(m_pendingGroups.empty(), "Pending contexts in queue."); m_groups.clear(); EndInternal(); diff --git a/Gems/Atom/RHI/Code/Source/RHI/FrameScheduler.cpp b/Gems/Atom/RHI/Code/Source/RHI/FrameScheduler.cpp index 67cad6c4dc..cf8f3aa2ed 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/FrameScheduler.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/FrameScheduler.cpp @@ -181,7 +181,10 @@ namespace AZ m_compileRequest = compileRequest; - FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompile); + { + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "FrameScheduler: Compile: OnFrameCompile"); + FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompile); + } FrameGraphCompileRequest frameGraphCompileRequest; frameGraphCompileRequest.m_frameGraph = m_frameGraph.get(); @@ -193,7 +196,10 @@ namespace AZ const MessageOutcome outcome = m_frameGraphCompiler->Compile(frameGraphCompileRequest); if (outcome.IsSuccess()) { - FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompileEnd, *m_frameGraph); + { + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "FrameScheduler: Compile: OnFrameCompileEnd"); + FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompileEnd, *m_frameGraph); + } FrameGraphLogger::Log(*m_frameGraph, compileRequest.m_logVerbosity); @@ -400,7 +406,11 @@ namespace AZ m_scopeProducers.clear(); m_scopeProducerLookup.clear(); - FrameEventBus::Event(m_device, &FrameEventBus::Events::OnFrameEnd); + + { + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "FrameScheduler: EndFrame: OnFrameEnd"); + FrameEventBus::Event(m_device, &FrameEventBus::Events::OnFrameEnd); + } const AZStd::sys_time_t timeNowTicks = AZStd::GetTimeNowTicks(); m_cpuTimingStatistics.m_frameToFrameTime = timeNowTicks - m_lastFrameEndTime; diff --git a/Gems/Atom/RHI/Code/Source/RHI/RHISystem.cpp b/Gems/Atom/RHI/Code/Source/RHI/RHISystem.cpp index 81409e2c18..e1666549c9 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/RHISystem.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/RHISystem.cpp @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -213,19 +214,23 @@ namespace AZ void RHISystem::FrameUpdate(FrameGraphCallback frameGraphCallback) { AZ_PROFILE_FUNCTION(AZ::Debug::ProfileCategory::AzRender); + AZ_ATOM_PROFILE_FUNCTION("RHI", "RHISystem: FrameUpdate"); { AZ_PROFILE_SCOPE(AZ::Debug::ProfileCategory::AzRender, "main per-frame work"); m_frameScheduler.BeginFrame(); - + frameGraphCallback(m_frameScheduler); /** * This exists as a hook to enable RHI sample tests, which are allowed to queue their * own RHI scopes to the frame scheduler. This happens prior to the RPI pass graph registration. */ - RHISystemNotificationBus::Broadcast(&RHISystemNotificationBus::Events::OnFramePrepare, m_frameScheduler); - + { + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "RHISystem :FrameUpdate: OnFramePrepare"); + RHISystemNotificationBus::Broadcast(&RHISystemNotificationBus::Events::OnFramePrepare, m_frameScheduler); + } + RHI::MessageOutcome outcome = m_frameScheduler.Compile(m_compileRequest); if (outcome.IsSuccess()) { diff --git a/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandListPool.cpp b/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandListPool.cpp index d43129a626..5510b4e924 100644 --- a/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandListPool.cpp +++ b/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandListPool.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include namespace AZ @@ -179,6 +180,7 @@ namespace AZ void CommandListAllocator::Collect() { + AZ_ATOM_PROFILE_FUNCTION("DX12", "CommandListAllocator: Collect"); for (uint32_t queueIdx = 0; queueIdx < RHI::HardwareQueueClassCount; ++queueIdx) { m_commandListSubAllocators[queueIdx].ForEach([](Internal::CommandListSubAllocator& commandListSubAllocator) diff --git a/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandQueueContext.cpp b/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandQueueContext.cpp index a31e105b19..5bf0a84700 100644 --- a/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandQueueContext.cpp +++ b/Gems/Atom/RHI/DX12/Code/Source/RHI/CommandQueueContext.cpp @@ -137,6 +137,7 @@ namespace AZ void CommandQueueContext::End() { AZ_PROFILE_FUNCTION(AZ::Debug::ProfileCategory::AzRender); + AZ_ATOM_PROFILE_FUNCTION("DX12", "CommandQueueContext: End"); QueueGpuSignals(m_frameFences[m_currentFrameIndex]); @@ -150,7 +151,7 @@ namespace AZ { AZ_PROFILE_SCOPE_IDLE(AZ::Debug::ProfileCategory::AzRender, "Wait and Reset Fence"); - AZ_ATOM_PROFILE_FUNCTION("RHI", "CommandQueueContext: Wait on Fences"); + AZ_ATOM_PROFILE_TIME_GROUP_REGION("DX12", "CommandQueueContext: Wait on Fences"); FenceEvent event("FrameFence"); m_frameFences[m_currentFrameIndex].Wait(event); diff --git a/Gems/Atom/RHI/DX12/Code/Source/RHI/DescriptorContext.cpp b/Gems/Atom/RHI/DX12/Code/Source/RHI/DescriptorContext.cpp index e11817a7d8..f83f169f2f 100644 --- a/Gems/Atom/RHI/DX12/Code/Source/RHI/DescriptorContext.cpp +++ b/Gems/Atom/RHI/DX12/Code/Source/RHI/DescriptorContext.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include namespace AZ @@ -345,6 +346,7 @@ namespace AZ void DescriptorContext::GarbageCollect() { + AZ_ATOM_PROFILE_FUNCTION("DX12", "DescriptorContext: GarbageCollect"); for (const auto& itr : m_platformLimitsDescriptor->m_descriptorHeapLimits) { for (uint32_t shaderVisibleIdx = 0; shaderVisibleIdx < PlatformLimitsDescriptor::NumHeapFlags; ++shaderVisibleIdx) diff --git a/Gems/Atom/RHI/DX12/Code/Source/RHI/StagingMemoryAllocator.cpp b/Gems/Atom/RHI/DX12/Code/Source/RHI/StagingMemoryAllocator.cpp index 47ae24f487..df5fc53468 100644 --- a/Gems/Atom/RHI/DX12/Code/Source/RHI/StagingMemoryAllocator.cpp +++ b/Gems/Atom/RHI/DX12/Code/Source/RHI/StagingMemoryAllocator.cpp @@ -60,6 +60,7 @@ namespace AZ void StagingMemoryAllocator::GarbageCollect() { + AZ_ATOM_PROFILE_FUNCTION("DX12", "StagingMemoryAllocator: GarbageCollect"); m_mediumBlockAllocators.ForEach([](MemoryLinearSubAllocator& subAllocator) { subAllocator.GarbageCollect(); diff --git a/Gems/Atom/RPI/Code/Source/RPI.Public/GpuQuery/GpuQuerySystem.cpp b/Gems/Atom/RPI/Code/Source/RPI.Public/GpuQuery/GpuQuerySystem.cpp index 2c9c1faf4a..e6d7aa01c9 100644 --- a/Gems/Atom/RPI/Code/Source/RPI.Public/GpuQuery/GpuQuerySystem.cpp +++ b/Gems/Atom/RPI/Code/Source/RPI.Public/GpuQuery/GpuQuerySystem.cpp @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -78,6 +79,7 @@ namespace AZ void GpuQuerySystem::Update() { + AZ_ATOM_PROFILE_FUNCTION("RPI", "GpuQuerySystem: Update"); for (auto& queryPool : m_queryPoolArray) { if (queryPool) diff --git a/Gems/Atom/RPI/Code/Source/RPI.Public/RPISystem.cpp b/Gems/Atom/RPI/Code/Source/RPI.Public/RPISystem.cpp index 87b6f5d92e..1db209e17a 100644 --- a/Gems/Atom/RPI/Code/Source/RPI.Public/RPISystem.cpp +++ b/Gems/Atom/RPI/Code/Source/RPI.Public/RPISystem.cpp @@ -282,24 +282,27 @@ namespace AZ m_rhiSystem.FrameUpdate( [this](RHI::FrameGraphBuilder& frameGraphBuilder) + { + // Pass system's frame update, which includes the logic of adding scope producers, has to be added here since the + // scope producers only can be added to the frame when frame started which cleans up previous scope producers. + m_passSystem.FrameUpdate(frameGraphBuilder); + + // Update View Srgs + for (auto& scenePtr : m_scenes) + { + scenePtr->UpdateSrgs(); + } + }); + { - // Pass system's frame update, which includes the logic of adding scope producers, has to be added here since the scope producers only can be added to the frame - // when frame started which cleans up previous scope producers. - m_passSystem.FrameUpdate(frameGraphBuilder); + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "RPISystem: FrameEnd"); + m_dynamicDraw.FrameEnd(); + m_passSystem.FrameEnd(); - // Update View Srgs for (auto& scenePtr : m_scenes) { - scenePtr->UpdateSrgs(); + scenePtr->OnFrameEnd(); } - }); - - m_dynamicDraw.FrameEnd(); - m_passSystem.FrameEnd(); - - for (auto& scenePtr : m_scenes) - { - scenePtr->OnFrameEnd(); } m_renderTick++; diff --git a/Gems/Atom/RPI/Code/Source/RPI.Public/Scene.cpp b/Gems/Atom/RPI/Code/Source/RPI.Public/Scene.cpp index 62efc3dccb..2ecbdde417 100644 --- a/Gems/Atom/RPI/Code/Source/RPI.Public/Scene.cpp +++ b/Gems/Atom/RPI/Code/Source/RPI.Public/Scene.cpp @@ -404,6 +404,7 @@ namespace AZ { AZ_PROFILE_SCOPE(Debug::ProfileCategory::AzRender, "WaitForSimulationCompletion"); + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "WaitForSimulationCompletion"); WaitAndCleanCompletionJob(m_simulationCompletion); } @@ -420,12 +421,15 @@ namespace AZ // Get active pipelines which need to be rendered and notify them frame started AZStd::vector activePipelines; - for (auto& pipeline : m_pipelines) { - if (pipeline->NeedsRender()) + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "OnStartFrame"); + for (auto& pipeline : m_pipelines) { - activePipelines.push_back(pipeline); - pipeline->OnStartFrame(tickInfo); + if (pipeline->NeedsRender()) + { + activePipelines.push_back(pipeline); + pipeline->OnStartFrame(tickInfo); + } } } @@ -444,7 +448,7 @@ namespace AZ { - AZ_PROFILE_SCOPE(Debug::ProfileCategory::AzRender, "Setup Views"); + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "Setup Views"); // Collect persistent views from all pipelines to be rendered AZStd::map persistentViews; @@ -490,7 +494,6 @@ namespace AZ { const auto renderLambda = [this, &fp]() { - AZ_PROFILE_SCOPE_DYNAMIC(Debug::ProfileCategory::AzRender, "renderJob - fp:%s", fp->RTTI_GetTypeName()); fp->Render(m_renderPacket); }; @@ -526,12 +529,14 @@ namespace AZ // Add dynamic draw data for all the views if (m_dynamicDrawSystem) { + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "DynamicDraw SubmitDrawData"); m_dynamicDrawSystem->SubmitDrawData(this, m_renderPacket.m_views); } } { AZ_PROFILE_SCOPE(AZ::Debug::ProfileCategory::AzRender, "FinalizeDrawLists"); + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "FinalizeDrawLists"); if (jobPolicy == RHI::JobPolicy::Serial) { for (auto& view : m_renderPacket.m_views) @@ -542,7 +547,6 @@ namespace AZ else { AZ::JobCompletion* finalizeDrawListsCompletion = aznew AZ::JobCompletion(); - AZ_PROFILE_EVENT_BEGIN(Debug::ProfileCategory::AzRender, "StartFinalizeDrawListsJobs"); for (auto& view : m_renderPacket.m_views) { const auto finalizeDrawListsLambda = [view]() @@ -559,11 +563,15 @@ namespace AZ } } - SceneNotificationBus::Event(GetId(), &SceneNotification::OnEndPrepareRender); + { + AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "Scene OnEndPrepareRender"); + SceneNotificationBus::Event(GetId(), &SceneNotification::OnEndPrepareRender); + } } void Scene::OnFrameEnd() { + AZ_ATOM_PROFILE_FUNCTION("RPI", "Scene: OnFrameEnd"); for (auto& pipeline : m_pipelines) { if (pipeline->NeedsRender()) @@ -702,6 +710,7 @@ namespace AZ void Scene::RebuildPipelineStatesLookup() { + AZ_ATOM_PROFILE_FUNCTION("RPI", "Scene: RebuildPipelineStatesLookup"); m_pipelineStatesLookup.clear(); AZStd::queue parents; diff --git a/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.h b/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.h index 4c9ddbe50a..a9ed5b7008 100644 --- a/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.h +++ b/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.h @@ -13,6 +13,7 @@ #pragma once #include +#include namespace AZ { @@ -56,6 +57,16 @@ namespace AZ ImGuiTextFilter m_timedRegionFilter; GroupRegionMap m_groupRegionMap; + + // Pause cpu profiling. The profiler will show the statistics of the last frame before pause + bool m_paused = false; + + // Total frames need to be saved + int m_captureFrameCount = 1; + + AZ::RHI::CpuTimingStatistics m_cpuTimingStatisticsWhenPause; + + AZStd::string m_lastCapturedFilePath; }; } // namespace Render } diff --git a/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl b/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl index 7a453b4597..50ef1a8e66 100644 --- a/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl +++ b/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl @@ -10,7 +10,10 @@ * */ -#include +#include +#include +#include // For AZ_MAX_PATH_LEN +#include namespace AZ { @@ -34,16 +37,42 @@ namespace AZ } } - inline void ImGuiCpuProfiler::Draw(bool& keepDrawing, const AZ::RHI::CpuTimingStatistics& cpuTimingStatistics) + inline void ImGuiCpuProfiler::Draw(bool& keepDrawing, const AZ::RHI::CpuTimingStatistics& currentCpuTimingStatistics) { // Cache the value to detect if it was changed by ImGui(user pressed 'x') const bool cachedShowCpuProfiler = keepDrawing; const ImVec2 windowSize(640.0f, 480.0f); ImGui::SetNextWindowSize(windowSize, ImGuiCond_Once); + bool captureToFile = false; if (ImGui::Begin("Cpu Profiler", &keepDrawing, ImGuiWindowFlags_None)) { - UpdateGroupRegionMap(); + m_paused = !AZ::RHI::CpuProfiler::Get()->IsProfilerEnabled(); + if (ImGui::Button(m_paused?"Resume":"Pause")) + { + m_paused = !m_paused; + AZ::RHI::CpuProfiler::Get()->SetProfilerEnabled(!m_paused); + } + + // Update region map and cache the input cpu timing statistics when the profiling is not paused + if (!m_paused) + { + UpdateGroupRegionMap(); + m_cpuTimingStatisticsWhenPause = currentCpuTimingStatistics; + } + + if (ImGui::Button("Capture")) + { + captureToFile = true; + } + + if (!m_lastCapturedFilePath.empty()) + { + ImGui::SameLine(); + ImGui::Text(m_lastCapturedFilePath.c_str()); + } + + const AZ::RHI::CpuTimingStatistics& cpuTimingStatistics = m_cpuTimingStatisticsWhenPause; const AZStd::sys_time_t ticksPerSecond = AZStd::GetTimeTicksPerSecond(); @@ -51,7 +80,7 @@ namespace AZ { // Note: converting to microseconds integer before converting to milliseconds float const float timeInMs = static_cast((duration * 1000) / (ticksPerSecond / 1000)) / 1000.0f; - ImGui::Text("%.1f ms", timeInMs); + ImGui::Text("%.2f ms", timeInMs); }; const auto ShowRow = [ticksPerSecond, &ShowTimeInMs](const char* regionLabel, AZStd::sys_time_t duration) @@ -158,6 +187,20 @@ namespace AZ } ImGui::End(); + if (captureToFile) + { + AZStd::sys_time_t timeNow = AZStd::GetTimeNowSecond(); + AZStd::string timeString; + AZStd::to_string(timeString, timeNow); + u64 currentTick = AZ::RPI::RPISystemInterface::Get()->GetCurrentTick(); + AZStd::string frameDataFilePath = AZStd::string::format("@user@/CpuProfiler/%s_%llu.json", timeString.c_str(), currentTick); + char resolvedPath[AZ_MAX_PATH_LEN]; + AZ::IO::FileIOBase::GetInstance()->ResolvePath(frameDataFilePath.c_str(), resolvedPath, AZ_MAX_PATH_LEN); + m_lastCapturedFilePath = resolvedPath; + AZ::Render::ProfilingCaptureRequestBus::Broadcast(&AZ::Render::ProfilingCaptureRequestBus::Events::CaptureCpuProfilingStatistics, + frameDataFilePath); + } + // Toggle if the bool isn't the same as the cached value if (cachedShowCpuProfiler != keepDrawing) {