ATOM-15780 Improve cpu profiler allows pause and output profiling data to a file for reference (#1358)

- Added a pause button in imgui cpu profiler.
- Added a capture button to save cpu profiling data to a data file.
- Added some profile marks in both RPI and RHI.
main
Qing Tao 5 years ago committed by GitHub
parent 6d608c2297
commit aa2c27b22d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -458,9 +458,13 @@ namespace AZ
bool ProfilingCaptureSystemComponent::CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath)
{
// Start the cpu profiling
RHI::CpuProfiler::Get()->SetProfilerEnabled(true);
bool wasEnabled = RHI::CpuProfiler::Get()->IsProfilerEnabled();
if (!wasEnabled)
{
RHI::CpuProfiler::Get()->SetProfilerEnabled(true);
}
const bool captureStarted = m_cpuProfilingStatisticsCapture.StartCapture([this, outputFilePath]()
const bool captureStarted = m_cpuProfilingStatisticsCapture.StartCapture([this, outputFilePath, wasEnabled]()
{
JsonSerializerSettings serializationSettings;
serializationSettings.m_keepDefaults = true;
@ -481,14 +485,22 @@ namespace AZ
saveResult.GetError().c_str());
AZ_Warning("ProfilingCaptureSystemComponent", false, captureInfo.c_str());
}
else
{
AZ_Printf("ProfilingCaptureSystemComponent", "Cpu profiling statistics was saved to file [%s]\n", outputFilePath.c_str());
}
// Disable the profiler again
RHI::CpuProfiler::Get()->SetProfilerEnabled(false);
if (!wasEnabled)
{
RHI::CpuProfiler::Get()->SetProfilerEnabled(false);
}
// Notify listeners that the pass' PipelineStatistics queries capture has finished.
ProfilingCaptureNotificationBus::Broadcast(&ProfilingCaptureNotificationBus::Events::OnCaptureCpuProfilingStatisticsFinished,
saveResult.IsSuccess(),
captureInfo);
});
// Start the TickBus.

@ -12,6 +12,7 @@
#pragma once
#include <AzCore/Debug/EventTrace.h>
#include <AzCore/RTTI/RTTI.h>
#include <AzCore/std/containers/unordered_map.h>
#include <AzCore/std/string/string.h>
@ -87,6 +88,8 @@ namespace AZ
//! Enable/Disable the CpuProfiler
virtual void SetProfilerEnabled(bool enabled) = 0;
virtual bool IsProfilerEnabled() const = 0 ;
};
} // namespace RPI

@ -102,6 +102,7 @@ namespace AZ
void EndTimeRegion() final;
void FlushTimeRegionMap(TimeRegionMap& timeRegionMap) final;
void SetProfilerEnabled(bool enabled) final;
bool IsProfilerEnabled() const final;
private:
// Lazily create and register the local thread data

@ -11,6 +11,7 @@
*/
#pragma once
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/FreeListAllocator.h>
#include <Atom/RHI/PoolAllocator.h>
#include <Atom/RHI/MemoryAllocation.h>
@ -160,6 +161,7 @@ namespace AZ
template <class Traits>
void MemorySubAllocator<Traits>::GarbageCollect()
{
AZ_ATOM_PROFILE_FUNCTION("RHI", "MemorySubAllocator: GarbageCollect");
for (PageContext& pageContext : m_pageContexts)
{
pageContext.m_allocator.GarbageCollect();

@ -11,6 +11,7 @@
*/
#pragma once
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/Object.h>
#include <AzCore/std/parallel/mutex.h>
#include <AzCore/std/containers/vector.h>
@ -169,6 +170,7 @@ namespace AZ
template <typename Traits>
void ObjectCollector<Traits>::Collect(bool forceFlush)
{
AZ_ATOM_PROFILE_FUNCTION("DX12", "ObjectCollector: Collect");
m_mutex.lock();
if (m_pendingObjects.size())
{

@ -11,6 +11,7 @@
*/
#include <Atom/RHI/CommandQueue.h>
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/Device.h>
namespace AZ
@ -86,6 +87,7 @@ namespace AZ
void CommandQueue::FlushCommands()
{
AZ_ATOM_PROFILE_FUNCTION("RHI", "CommandQueue: FlushCommands");
while (!m_isWorkQueueEmpty && !m_isQuitting)
{
AZStd::this_thread::yield();

@ -186,6 +186,12 @@ namespace AZ
}
}
bool CpuProfilerImpl::IsProfilerEnabled() const
{
return m_enabled;
}
void CpuProfilerImpl::RegisterThreadStorage()
{
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);

@ -10,6 +10,7 @@
*
*/
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/Device.h>
#include <Atom/RHI/MemoryStatisticsBus.h>
@ -151,6 +152,7 @@ namespace AZ
{
if (ValidateIsInitialized() && ValidateIsInFrame())
{
AZ_ATOM_PROFILE_FUNCTION("RHI", "Device: EndFrame");
EndFrameInternal();
m_isInFrame = false;
return ResultCode::Success;
@ -172,6 +174,7 @@ namespace AZ
{
if (ValidateIsInitialized() && ValidateIsNotInFrame())
{
AZ_ATOM_PROFILE_FUNCTION("RHI", "Device: CompileMemoryStatistics");
MemoryStatisticsBuilder builder;
builder.Begin(memoryStatistics, reportFlags);
CompileMemoryStatisticsInternal(builder);

@ -9,18 +9,19 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
*/
#include <Atom/RHI/FrameGraph.h>
#include <Atom/RHI/SwapChainFrameAttachment.h>
#include <Atom/RHI/BufferFrameAttachment.h>
#include <Atom/RHI/ImageFrameAttachment.h>
#include <Atom/RHI/BufferScopeAttachment.h>
#include <Atom/RHI/ImageScopeAttachment.h>
#include <Atom/RHI/ResolveScopeAttachment.h>
#include <Atom/RHI/SwapChain.h>
#include <Atom/RHI/BufferPoolBase.h>
#include <Atom/RHI/BufferScopeAttachment.h>
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/FrameGraph.h>
#include <Atom/RHI/ImageFrameAttachment.h>
#include <Atom/RHI/ImagePoolBase.h>
#include <Atom/RHI/ImageScopeAttachment.h>
#include <Atom/RHI/QueryPool.h>
#include <Atom/RHI/ResolveScopeAttachment.h>
#include <Atom/RHI/Scope.h>
#include <Atom/RHI/SwapChain.h>
#include <Atom/RHI/SwapChainFrameAttachment.h>
#include <AzCore/Debug/EventTrace.h>
#include <AzCore/std/sort.h>
@ -76,6 +77,7 @@ namespace AZ
void FrameGraph::Clear()
{
AZ_ATOM_PROFILE_FUNCTION("RHI", "FrameGraph: Clear");
for (Scope* scope : m_scopes)
{
scope->Deactivate();

@ -9,10 +9,11 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
*/
#include <Atom/RHI/FrameGraphExecuter.h>
#include <Atom/RHI/Buffer.h>
#include <Atom/RHI/FrameGraph.h>
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/FrameGraphExecuteGroup.h>
#include <Atom/RHI/FrameGraphExecuter.h>
#include <Atom/RHI/FrameGraph.h>
#include <Atom/RHI/Image.h>
#include <AzCore/Debug/EventTrace.h>
@ -80,7 +81,7 @@ namespace AZ
void FrameGraphExecuter::End()
{
AZ_TRACE_METHOD();
AZ_ATOM_PROFILE_FUNCTION("RHI", "FrameGraphExecuter: End");
AZ_Assert(m_pendingGroups.empty(), "Pending contexts in queue.");
m_groups.clear();
EndInternal();

@ -181,7 +181,10 @@ namespace AZ
m_compileRequest = compileRequest;
FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompile);
{
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "FrameScheduler: Compile: OnFrameCompile");
FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompile);
}
FrameGraphCompileRequest frameGraphCompileRequest;
frameGraphCompileRequest.m_frameGraph = m_frameGraph.get();
@ -193,7 +196,10 @@ namespace AZ
const MessageOutcome outcome = m_frameGraphCompiler->Compile(frameGraphCompileRequest);
if (outcome.IsSuccess())
{
FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompileEnd, *m_frameGraph);
{
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "FrameScheduler: Compile: OnFrameCompileEnd");
FrameEventBus::Broadcast(&FrameEventBus::Events::OnFrameCompileEnd, *m_frameGraph);
}
FrameGraphLogger::Log(*m_frameGraph, compileRequest.m_logVerbosity);
@ -400,7 +406,11 @@ namespace AZ
m_scopeProducers.clear();
m_scopeProducerLookup.clear();
FrameEventBus::Event(m_device, &FrameEventBus::Events::OnFrameEnd);
{
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "FrameScheduler: EndFrame: OnFrameEnd");
FrameEventBus::Event(m_device, &FrameEventBus::Events::OnFrameEnd);
}
const AZStd::sys_time_t timeNowTicks = AZStd::GetTimeNowTicks();
m_cpuTimingStatistics.m_frameToFrameTime = timeNowTicks - m_lastFrameEndTime;

@ -10,6 +10,7 @@
*
*/
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/Device.h>
#include <Atom/RHI/Factory.h>
#include <Atom/RHI/RHISystem.h>
@ -213,6 +214,7 @@ namespace AZ
void RHISystem::FrameUpdate(FrameGraphCallback frameGraphCallback)
{
AZ_PROFILE_FUNCTION(AZ::Debug::ProfileCategory::AzRender);
AZ_ATOM_PROFILE_FUNCTION("RHI", "RHISystem: FrameUpdate");
{
AZ_PROFILE_SCOPE(AZ::Debug::ProfileCategory::AzRender, "main per-frame work");
@ -224,7 +226,10 @@ namespace AZ
* This exists as a hook to enable RHI sample tests, which are allowed to queue their
* own RHI scopes to the frame scheduler. This happens prior to the RPI pass graph registration.
*/
RHISystemNotificationBus::Broadcast(&RHISystemNotificationBus::Events::OnFramePrepare, m_frameScheduler);
{
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RHI", "RHISystem :FrameUpdate: OnFramePrepare");
RHISystemNotificationBus::Broadcast(&RHISystemNotificationBus::Events::OnFramePrepare, m_frameScheduler);
}
RHI::MessageOutcome outcome = m_frameScheduler.Compile(m_compileRequest);
if (outcome.IsSuccess())

@ -14,6 +14,7 @@
#include <RHI/CommandList.h>
#include <RHI/Conversions.h>
#include <RHI/DescriptorContext.h>
#include <Atom/RHI/CpuProfiler.h>
#include <AzCore/Debug/EventTrace.h>
namespace AZ
@ -179,6 +180,7 @@ namespace AZ
void CommandListAllocator::Collect()
{
AZ_ATOM_PROFILE_FUNCTION("DX12", "CommandListAllocator: Collect");
for (uint32_t queueIdx = 0; queueIdx < RHI::HardwareQueueClassCount; ++queueIdx)
{
m_commandListSubAllocators[queueIdx].ForEach([](Internal::CommandListSubAllocator& commandListSubAllocator)

@ -137,6 +137,7 @@ namespace AZ
void CommandQueueContext::End()
{
AZ_PROFILE_FUNCTION(AZ::Debug::ProfileCategory::AzRender);
AZ_ATOM_PROFILE_FUNCTION("DX12", "CommandQueueContext: End");
QueueGpuSignals(m_frameFences[m_currentFrameIndex]);
@ -150,7 +151,7 @@ namespace AZ
{
AZ_PROFILE_SCOPE_IDLE(AZ::Debug::ProfileCategory::AzRender, "Wait and Reset Fence");
AZ_ATOM_PROFILE_FUNCTION("RHI", "CommandQueueContext: Wait on Fences");
AZ_ATOM_PROFILE_TIME_GROUP_REGION("DX12", "CommandQueueContext: Wait on Fences");
FenceEvent event("FrameFence");
m_frameFences[m_currentFrameIndex].Wait(event);

@ -15,6 +15,7 @@
#include <RHI/Conversions.h>
#include <RHI/Device.h>
#include <RHI/Image.h>
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI.Reflect/DX12/PlatformLimitsDescriptor.h>
namespace AZ
@ -345,6 +346,7 @@ namespace AZ
void DescriptorContext::GarbageCollect()
{
AZ_ATOM_PROFILE_FUNCTION("DX12", "DescriptorContext: GarbageCollect");
for (const auto& itr : m_platformLimitsDescriptor->m_descriptorHeapLimits)
{
for (uint32_t shaderVisibleIdx = 0; shaderVisibleIdx < PlatformLimitsDescriptor::NumHeapFlags; ++shaderVisibleIdx)

@ -60,6 +60,7 @@ namespace AZ
void StagingMemoryAllocator::GarbageCollect()
{
AZ_ATOM_PROFILE_FUNCTION("DX12", "StagingMemoryAllocator: GarbageCollect");
m_mediumBlockAllocators.ForEach([](MemoryLinearSubAllocator& subAllocator)
{
subAllocator.GarbageCollect();

@ -11,6 +11,7 @@
*/
#include <Atom/RHI/CommandList.h>
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI/Factory.h>
#include <Atom/RHI/FrameGraphInterface.h>
#include <Atom/RHI/RHISystemInterface.h>
@ -78,6 +79,7 @@ namespace AZ
void GpuQuerySystem::Update()
{
AZ_ATOM_PROFILE_FUNCTION("RPI", "GpuQuerySystem: Update");
for (auto& queryPool : m_queryPoolArray)
{
if (queryPool)

@ -282,24 +282,27 @@ namespace AZ
m_rhiSystem.FrameUpdate(
[this](RHI::FrameGraphBuilder& frameGraphBuilder)
{
// Pass system's frame update, which includes the logic of adding scope producers, has to be added here since the
// scope producers only can be added to the frame when frame started which cleans up previous scope producers.
m_passSystem.FrameUpdate(frameGraphBuilder);
// Update View Srgs
for (auto& scenePtr : m_scenes)
{
scenePtr->UpdateSrgs();
}
});
{
// Pass system's frame update, which includes the logic of adding scope producers, has to be added here since the scope producers only can be added to the frame
// when frame started which cleans up previous scope producers.
m_passSystem.FrameUpdate(frameGraphBuilder);
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "RPISystem: FrameEnd");
m_dynamicDraw.FrameEnd();
m_passSystem.FrameEnd();
// Update View Srgs
for (auto& scenePtr : m_scenes)
{
scenePtr->UpdateSrgs();
scenePtr->OnFrameEnd();
}
});
m_dynamicDraw.FrameEnd();
m_passSystem.FrameEnd();
for (auto& scenePtr : m_scenes)
{
scenePtr->OnFrameEnd();
}
m_renderTick++;

@ -404,6 +404,7 @@ namespace AZ
{
AZ_PROFILE_SCOPE(Debug::ProfileCategory::AzRender, "WaitForSimulationCompletion");
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "WaitForSimulationCompletion");
WaitAndCleanCompletionJob(m_simulationCompletion);
}
@ -420,12 +421,15 @@ namespace AZ
// Get active pipelines which need to be rendered and notify them frame started
AZStd::vector<RenderPipelinePtr> activePipelines;
for (auto& pipeline : m_pipelines)
{
if (pipeline->NeedsRender())
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "OnStartFrame");
for (auto& pipeline : m_pipelines)
{
activePipelines.push_back(pipeline);
pipeline->OnStartFrame(tickInfo);
if (pipeline->NeedsRender())
{
activePipelines.push_back(pipeline);
pipeline->OnStartFrame(tickInfo);
}
}
}
@ -444,7 +448,7 @@ namespace AZ
{
AZ_PROFILE_SCOPE(Debug::ProfileCategory::AzRender, "Setup Views");
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "Setup Views");
// Collect persistent views from all pipelines to be rendered
AZStd::map<ViewPtr, RHI::DrawListMask> persistentViews;
@ -490,7 +494,6 @@ namespace AZ
{
const auto renderLambda = [this, &fp]()
{
AZ_PROFILE_SCOPE_DYNAMIC(Debug::ProfileCategory::AzRender, "renderJob - fp:%s", fp->RTTI_GetTypeName());
fp->Render(m_renderPacket);
};
@ -526,12 +529,14 @@ namespace AZ
// Add dynamic draw data for all the views
if (m_dynamicDrawSystem)
{
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "DynamicDraw SubmitDrawData");
m_dynamicDrawSystem->SubmitDrawData(this, m_renderPacket.m_views);
}
}
{
AZ_PROFILE_SCOPE(AZ::Debug::ProfileCategory::AzRender, "FinalizeDrawLists");
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "FinalizeDrawLists");
if (jobPolicy == RHI::JobPolicy::Serial)
{
for (auto& view : m_renderPacket.m_views)
@ -542,7 +547,6 @@ namespace AZ
else
{
AZ::JobCompletion* finalizeDrawListsCompletion = aznew AZ::JobCompletion();
AZ_PROFILE_EVENT_BEGIN(Debug::ProfileCategory::AzRender, "StartFinalizeDrawListsJobs");
for (auto& view : m_renderPacket.m_views)
{
const auto finalizeDrawListsLambda = [view]()
@ -559,11 +563,15 @@ namespace AZ
}
}
SceneNotificationBus::Event(GetId(), &SceneNotification::OnEndPrepareRender);
{
AZ_ATOM_PROFILE_TIME_GROUP_REGION("RPI", "Scene OnEndPrepareRender");
SceneNotificationBus::Event(GetId(), &SceneNotification::OnEndPrepareRender);
}
}
void Scene::OnFrameEnd()
{
AZ_ATOM_PROFILE_FUNCTION("RPI", "Scene: OnFrameEnd");
for (auto& pipeline : m_pipelines)
{
if (pipeline->NeedsRender())
@ -702,6 +710,7 @@ namespace AZ
void Scene::RebuildPipelineStatesLookup()
{
AZ_ATOM_PROFILE_FUNCTION("RPI", "Scene: RebuildPipelineStatesLookup");
m_pipelineStatesLookup.clear();
AZStd::queue<ParentPass*> parents;

@ -13,6 +13,7 @@
#pragma once
#include <Atom/RHI/CpuProfiler.h>
#include <Atom/RHI.Reflect/CpuTimingStatistics.h>
namespace AZ
{
@ -56,6 +57,16 @@ namespace AZ
ImGuiTextFilter m_timedRegionFilter;
GroupRegionMap m_groupRegionMap;
// Pause cpu profiling. The profiler will show the statistics of the last frame before pause
bool m_paused = false;
// Total frames need to be saved
int m_captureFrameCount = 1;
AZ::RHI::CpuTimingStatistics m_cpuTimingStatisticsWhenPause;
AZStd::string m_lastCapturedFilePath;
};
} // namespace Render
}

@ -10,7 +10,10 @@
*
*/
#include <Atom/RHI.Reflect/CpuTimingStatistics.h>
#include <Atom/Feature/Utils/ProfilingCaptureBus.h>
#include <Atom/RPI.Public/RPISystemInterface.h>
#include <AzCore/IO/SystemFile.h> // For AZ_MAX_PATH_LEN
#include <AzCore/std/time.h>
namespace AZ
{
@ -34,16 +37,42 @@ namespace AZ
}
}
inline void ImGuiCpuProfiler::Draw(bool& keepDrawing, const AZ::RHI::CpuTimingStatistics& cpuTimingStatistics)
inline void ImGuiCpuProfiler::Draw(bool& keepDrawing, const AZ::RHI::CpuTimingStatistics& currentCpuTimingStatistics)
{
// Cache the value to detect if it was changed by ImGui(user pressed 'x')
const bool cachedShowCpuProfiler = keepDrawing;
const ImVec2 windowSize(640.0f, 480.0f);
ImGui::SetNextWindowSize(windowSize, ImGuiCond_Once);
bool captureToFile = false;
if (ImGui::Begin("Cpu Profiler", &keepDrawing, ImGuiWindowFlags_None))
{
UpdateGroupRegionMap();
m_paused = !AZ::RHI::CpuProfiler::Get()->IsProfilerEnabled();
if (ImGui::Button(m_paused?"Resume":"Pause"))
{
m_paused = !m_paused;
AZ::RHI::CpuProfiler::Get()->SetProfilerEnabled(!m_paused);
}
// Update region map and cache the input cpu timing statistics when the profiling is not paused
if (!m_paused)
{
UpdateGroupRegionMap();
m_cpuTimingStatisticsWhenPause = currentCpuTimingStatistics;
}
if (ImGui::Button("Capture"))
{
captureToFile = true;
}
if (!m_lastCapturedFilePath.empty())
{
ImGui::SameLine();
ImGui::Text(m_lastCapturedFilePath.c_str());
}
const AZ::RHI::CpuTimingStatistics& cpuTimingStatistics = m_cpuTimingStatisticsWhenPause;
const AZStd::sys_time_t ticksPerSecond = AZStd::GetTimeTicksPerSecond();
@ -51,7 +80,7 @@ namespace AZ
{
// Note: converting to microseconds integer before converting to milliseconds float
const float timeInMs = static_cast<float>((duration * 1000) / (ticksPerSecond / 1000)) / 1000.0f;
ImGui::Text("%.1f ms", timeInMs);
ImGui::Text("%.2f ms", timeInMs);
};
const auto ShowRow = [ticksPerSecond, &ShowTimeInMs](const char* regionLabel, AZStd::sys_time_t duration)
@ -158,6 +187,20 @@ namespace AZ
}
ImGui::End();
if (captureToFile)
{
AZStd::sys_time_t timeNow = AZStd::GetTimeNowSecond();
AZStd::string timeString;
AZStd::to_string(timeString, timeNow);
u64 currentTick = AZ::RPI::RPISystemInterface::Get()->GetCurrentTick();
AZStd::string frameDataFilePath = AZStd::string::format("@user@/CpuProfiler/%s_%llu.json", timeString.c_str(), currentTick);
char resolvedPath[AZ_MAX_PATH_LEN];
AZ::IO::FileIOBase::GetInstance()->ResolvePath(frameDataFilePath.c_str(), resolvedPath, AZ_MAX_PATH_LEN);
m_lastCapturedFilePath = resolvedPath;
AZ::Render::ProfilingCaptureRequestBus::Broadcast(&AZ::Render::ProfilingCaptureRequestBus::Events::CaptureCpuProfilingStatistics,
frameDataFilePath);
}
// Toggle if the bool isn't the same as the cached value
if (cachedShowCpuProfiler != keepDrawing)
{

Loading…
Cancel
Save