[ATOM-16021] Initial continuous capture support (#2624)

* Profiler: Implement continuous capture internals

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: extend ProfileCaptureSystemComponent

Implements dumping of saved CPU profiling data to a local file, blocking
call.

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: Working IO thread for serialization

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: switch to AZ::JobFunction for IO

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: move to a ring buffer for storage

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: switch back to IO thread

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: add TODO

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>

* Profiler: correct thread safety issues

Signed-off-by: Jacob Hilliard <jhlliar@amazon.com>
monroegm-disable-blank-issue-2
Jacob Hilliard 4 years ago committed by GitHub
parent adaeb7c203
commit 7c5120b72f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -28,9 +28,15 @@ namespace AZ
//! Dump the PipelineStatistics from passes to a json file.
virtual bool CapturePassPipelineStatistics(const AZStd::string& outputFilePath) = 0;
//! Dump the Cpu Profiling Statistics to a json file.
//! Dump a single frame of Cpu profiling data
virtual bool CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath) = 0;
//! Start a multiframe capture of CPU profiling data.
virtual bool BeginContinuousCpuProfilingCapture() = 0;
//! End and dump an in-progress continuous capture.
virtual bool EndContinuousCpuProfilingCapture(const AZStd::string& outputFilePath) = 0;
//! Dump the benchmark metadata to a json file.
virtual bool CaptureBenchmarkMetadata(const AZStd::string& benchmarkName, const AZStd::string& outputFilePath) = 0;
};

@ -25,6 +25,7 @@
#include <AzCore/RTTI/BehaviorContext.h>
#include <AzCore/Serialization/Json/JsonSerializationSettings.h>
#include <AzCore/Serialization/SerializeContext.h>
#include <AzCore/std/parallel/thread.h>
namespace AZ
{
@ -157,14 +158,15 @@ namespace AZ
Name m_groupName;
Name m_regionName;
uint16_t m_stackDepth;
AZStd::sys_time_t m_elapsedInNanoseconds;
AZStd::sys_time_t m_startTick;
AZStd::sys_time_t m_endTick;
};
AZ_TYPE_INFO(CpuProfilingStatisticsSerializer, "{D5B02946-0D27-474F-9A44-364C2706DD41}");
static void Reflect(AZ::ReflectContext* context);
CpuProfilingStatisticsSerializer() = default;
CpuProfilingStatisticsSerializer(const RHI::CpuProfiler::TimeRegionMap& timeRegionMap);
CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& continuousData);
AZStd::vector<CpuProfilingStatisticsSerializerEntry> m_cpuProfilingStatisticsSerializerEntries;
};
@ -327,17 +329,20 @@ namespace AZ
// --- CpuProfilingStatisticsSerializer ---
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const RHI::CpuProfiler::TimeRegionMap& timeRegionMap)
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& continuousData)
{
// Create serializable entries
for (auto& threadEntry : timeRegionMap)
for (const auto& timeRegionMap : continuousData)
{
for (auto& cachedRegionEntry : threadEntry.second)
for (const auto& threadEntry : timeRegionMap)
{
m_cpuProfilingStatisticsSerializerEntries.insert(
m_cpuProfilingStatisticsSerializerEntries.end(),
cachedRegionEntry.second.begin(),
cachedRegionEntry.second.end());
for (const auto& cachedRegionEntry : threadEntry.second)
{
m_cpuProfilingStatisticsSerializerEntries.insert(
m_cpuProfilingStatisticsSerializerEntries.end(),
cachedRegionEntry.second.begin(),
cachedRegionEntry.second.end());
}
}
}
}
@ -359,19 +364,11 @@ namespace AZ
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::CpuProfilingStatisticsSerializerEntry(const RHI::CachedTimeRegion& cachedTimeRegion)
{
// Converts ticks to Nanoseconds
static const auto ticksToNanoSeconds = [](AZStd::sys_time_t elapsedInTicks) -> AZStd::sys_time_t
{
const AZStd::sys_time_t ticksPerSecond = AZStd::GetTimeTicksPerSecond();
const AZStd::sys_time_t timeInNanoseconds = (elapsedInTicks * 1000000) / (ticksPerSecond / 1000);
return timeInNanoseconds;
};
m_groupName = cachedTimeRegion.m_groupRegionName->m_groupName;
m_regionName = cachedTimeRegion.m_groupRegionName->m_regionName;
m_stackDepth = cachedTimeRegion.m_stackDepth;
m_elapsedInNanoseconds = ticksToNanoSeconds(cachedTimeRegion.m_endTick - cachedTimeRegion.m_startTick);
m_startTick = cachedTimeRegion.m_startTick;
m_endTick = cachedTimeRegion.m_endTick;
}
void CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::Reflect(AZ::ReflectContext* context)
@ -383,7 +380,8 @@ namespace AZ
->Field("groupName", &CpuProfilingStatisticsSerializerEntry::m_groupName)
->Field("regionName", &CpuProfilingStatisticsSerializerEntry::m_regionName)
->Field("stackDepth", &CpuProfilingStatisticsSerializerEntry::m_stackDepth)
->Field("elapsedInNanoseconds", &CpuProfilingStatisticsSerializerEntry::m_elapsedInNanoseconds)
->Field("startTick", &CpuProfilingStatisticsSerializerEntry::m_startTick)
->Field("endTick", &CpuProfilingStatisticsSerializerEntry::m_endTick)
;
}
}
@ -474,6 +472,12 @@ namespace AZ
TickBus::Handler::BusDisconnect();
ProfilingCaptureRequestBus::Handler::BusDisconnect();
// Block deactivation until the IO thread has finished serializing the CPU data
if (m_cpuDataSerializationThread.joinable())
{
m_cpuDataSerializationThread.join();
}
}
bool ProfilingCaptureSystemComponent::CapturePassTimestamp(const AZStd::string& outputFilePath)
@ -641,6 +645,43 @@ namespace AZ
return captureStarted;
}
bool SerializeCpuProfilingData(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& data, AZStd::string outputFilePath, bool wasEnabled)
{
AZ_TracePrintf("ProfilingCaptureSystemComponent", "Beginning serialization of %zu frames of profiling data\n", data.size());
JsonSerializerSettings serializationSettings;
serializationSettings.m_keepDefaults = true;
CpuProfilingStatisticsSerializer serializer(data);
const auto saveResult = JsonSerializationUtils::SaveObjectToFile(&serializer,
outputFilePath, (CpuProfilingStatisticsSerializer*)nullptr, &serializationSettings);
AZStd::string captureInfo = outputFilePath;
if (!saveResult.IsSuccess())
{
captureInfo = AZStd::string::format("Failed to save Cpu Profiling Statistics data to file '%s'. Error: %s",
outputFilePath.c_str(),
saveResult.GetError().c_str());
AZ_Warning("ProfilingCaptureSystemComponent", false, captureInfo.c_str());
}
else
{
AZ_Printf("ProfilingCaptureSystemComponent", "Cpu profiling statistics was saved to file [%s]\n", outputFilePath.c_str());
}
// Disable the profiler again
if (!wasEnabled)
{
RHI::CpuProfiler::Get()->SetProfilerEnabled(false);
}
// Notify listeners that the pass' PipelineStatistics queries capture has finished.
ProfilingCaptureNotificationBus::Broadcast(&ProfilingCaptureNotificationBus::Events::OnCaptureCpuProfilingStatisticsFinished,
saveResult.IsSuccess(),
captureInfo);
return saveResult.IsSuccess();
}
bool ProfilingCaptureSystemComponent::CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath)
{
// Start the cpu profiling
@ -652,49 +693,66 @@ namespace AZ
const bool captureStarted = m_cpuProfilingStatisticsCapture.StartCapture([this, outputFilePath, wasEnabled]()
{
JsonSerializerSettings serializationSettings;
serializationSettings.m_keepDefaults = true;
// Blocking call for a single frame of data, avoid thread overhead
AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap> singleFrameData;
singleFrameData.push_back(RHI::CpuProfiler::Get()->GetTimeRegionMap());
SerializeCpuProfilingData(singleFrameData, outputFilePath, wasEnabled);
});
// Start the TickBus.
if (captureStarted)
{
TickBus::Handler::BusConnect();
}
// Get time Cpu profiled time regions
const RHI::CpuProfiler::TimeRegionMap& timeRegionMap = RHI::CpuProfiler::Get()->GetTimeRegionMap();
return captureStarted;
}
CpuProfilingStatisticsSerializer serializer(timeRegionMap);
const auto saveResult = JsonSerializationUtils::SaveObjectToFile(&serializer,
outputFilePath, (CpuProfilingStatisticsSerializer*)nullptr, &serializationSettings);
bool ProfilingCaptureSystemComponent::BeginContinuousCpuProfilingCapture()
{
return AZ::RHI::CpuProfiler::Get()->BeginContinuousCapture();
}
AZStd::string captureInfo = outputFilePath;
if (!saveResult.IsSuccess())
{
captureInfo = AZStd::string::format("Failed to save Cpu Profiling Statistics data to file '%s'. Error: %s",
outputFilePath.c_str(),
saveResult.GetError().c_str());
AZ_Warning("ProfilingCaptureSystemComponent", false, captureInfo.c_str());
}
else
bool ProfilingCaptureSystemComponent::EndContinuousCpuProfilingCapture(const AZStd::string& outputFilePath)
{
bool expected = false;
if (m_cpuDataSerializationInProgress.compare_exchange_strong(expected, true))
{
AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap> captureResult;
const bool captureEnded = AZ::RHI::CpuProfiler::Get()->EndContinuousCapture(captureResult);
if (!captureEnded)
{
AZ_Printf("ProfilingCaptureSystemComponent", "Cpu profiling statistics was saved to file [%s]\n", outputFilePath.c_str());
AZ_TracePrintf("ProfilingCaptureSystemComponent", "Could not end the continuous capture, is one in progress?\n");
m_cpuDataSerializationInProgress.store(false);
return false;
}
// Disable the profiler again
if (!wasEnabled)
// cpuProfilingData could be 1GB+ once saved, so use an IO thread to write it to disk.
auto threadIoFunction =
[data = AZStd::move(captureResult), filePath = AZStd::string(outputFilePath), &flag = m_cpuDataSerializationInProgress]()
{
RHI::CpuProfiler::Get()->SetProfilerEnabled(false);
SerializeCpuProfilingData(data, filePath, true);
flag.store(false);
};
// If the thread object already exists (ex. we have already serialized data), join. This will not block since
// m_cpuDataSerializationInProgress was false, meaning the IO thread has already completed execution.
// TODO Use a reusable thread implementation over repeated creation + destruction of threads [ATOM-16214]
if (m_cpuDataSerializationThread.joinable())
{
m_cpuDataSerializationThread.join();
}
// Notify listeners that the pass' PipelineStatistics queries capture has finished.
ProfilingCaptureNotificationBus::Broadcast(&ProfilingCaptureNotificationBus::Events::OnCaptureCpuProfilingStatisticsFinished,
saveResult.IsSuccess(),
captureInfo);
auto thread = AZStd::thread(threadIoFunction);
m_cpuDataSerializationThread = AZStd::move(thread);
});
// Start the TickBus.
if (captureStarted)
{
TickBus::Handler::BusConnect();
return true;
}
return captureStarted;
AZ_TracePrintf(
"ProfilingSystemCaptureComponent",
"Cannot end a continuous capture - another serialization is currently in progress\n");
return false;
}
bool ProfilingCaptureSystemComponent::CaptureBenchmarkMetadata(const AZStd::string& benchmarkName, const AZStd::string& outputFilePath)

@ -12,6 +12,7 @@
#include <AzCore/Component/TickBus.h>
#include <Atom/Feature/Utils/ProfilingCaptureBus.h>
#include <Atom/RHI/CpuProfiler.h>
namespace AZ
{
@ -71,6 +72,8 @@ namespace AZ
bool CaptureCpuFrameTime(const AZStd::string& outputFilePath) override;
bool CapturePassPipelineStatistics(const AZStd::string& outputFilePath) override;
bool CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath) override;
bool BeginContinuousCpuProfilingCapture() override;
bool EndContinuousCpuProfilingCapture(const AZStd::string& outputFilePath) override;
bool CaptureBenchmarkMetadata(const AZStd::string& benchmarkName, const AZStd::string& outputFilePath) override;
private:
@ -86,6 +89,11 @@ namespace AZ
DelayedQueryCaptureHelper m_pipelineStatisticsCapture;
DelayedQueryCaptureHelper m_cpuProfilingStatisticsCapture;
DelayedQueryCaptureHelper m_benchmarkMetadataCapture;
// Flag passed by reference to the CPU profiling data serialization job, blocks new continuous capture requests when set.
AZStd::atomic_bool m_cpuDataSerializationInProgress = false;
AZStd::thread m_cpuDataSerializationThread;
};
}
}

@ -10,6 +10,7 @@
#include <AzCore/Debug/EventTrace.h>
#include <AzCore/RTTI/RTTI.h>
#include <AzCore/std/containers/ring_buffer.h>
#include <AzCore/std/containers/unordered_map.h>
#include <AzCore/std/string/string.h>
@ -82,6 +83,14 @@ namespace AZ
//! Get the last frame's TimeRegionMap
virtual const TimeRegionMap& GetTimeRegionMap() const = 0;
//! Begin a continuous capture. Blocks the profiler from being toggled off until EndContinuousCapture is called.
[[nodiscard]] virtual bool BeginContinuousCapture() = 0;
//! Flush the CPU Profiler's saved data into the passed ring buffer .
[[nodiscard]] virtual bool EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget) = 0;
virtual bool IsContinuousCaptureInProgress() const = 0;
//! Enable/Disable the CpuProfiler
virtual void SetProfilerEnabled(bool enabled) = 0;

@ -106,13 +106,18 @@ namespace AZ
void OnSystemTick() final override;
//! CpuProfiler overrides...
void BeginTimeRegion(TimeRegion& timeRegion) final;
void EndTimeRegion() final;
const TimeRegionMap& GetTimeRegionMap() const final;
void SetProfilerEnabled(bool enabled) final;
bool IsProfilerEnabled() const final;
void BeginTimeRegion(TimeRegion& timeRegion) final override;
void EndTimeRegion() final override;
const TimeRegionMap& GetTimeRegionMap() const final override;
bool BeginContinuousCapture() final override;
bool EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget) final override;
bool IsContinuousCaptureInProgress() const final override;
void SetProfilerEnabled(bool enabled) final override;
bool IsProfilerEnabled() const final override;
private:
static constexpr AZStd::size_t MaxFramesToSave = 2 * 60 * 120; // 2 minutes of 120fps
// Lazily create and register the local thread data
void RegisterThreadStorage();
@ -134,6 +139,14 @@ namespace AZ
AZStd::shared_mutex m_shutdownMutex;
bool m_initialized = false;
AZStd::mutex m_continuousCaptureEndingMutex;
AZStd::atomic_bool m_continuousCaptureInProgress;
// Stores multiple frames of profiling data, size is controlled by MaxFramesToSave. Flushed when EndContinuousCapture is called.
// Ring buffer so that we can have fast append of new data + removal of old profiling data with good cache locality.
AZStd::ring_buffer<TimeRegionMap> m_continuousCaptureData;
};
}; // namespace RPI

@ -81,6 +81,7 @@ namespace AZ
Interface<CpuProfiler>::Register(this);
m_initialized = true;
SystemTickBus::Handler::BusConnect();
m_continuousCaptureData.set_capacity(10);
}
void CpuProfilerImpl::Shutdown()
@ -101,6 +102,8 @@ namespace AZ
m_registeredThreads.clear();
m_timeRegionMap.clear();
m_initialized = false;
m_continuousCaptureInProgress.store(false);
m_continuousCaptureData.clear();
SystemTickBus::Handler::BusDisconnect();
}
@ -141,12 +144,54 @@ namespace AZ
return m_timeRegionMap;
}
bool CpuProfilerImpl::BeginContinuousCapture()
{
bool expected = false;
if (m_continuousCaptureInProgress.compare_exchange_strong(expected, true))
{
m_enabled = true;
AZ_TracePrintf("Profiler", "Continuous capture started\n");
return true;
}
AZ_TracePrintf("Profiler", "Attempting to start a continuous capture while one already in progress");
return false;
}
bool CpuProfilerImpl::EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget)
{
if (!m_continuousCaptureInProgress.load())
{
AZ_TracePrintf("Profiler", "Attempting to end a continuous capture while one not in progress");
return false;
}
if (m_continuousCaptureEndingMutex.try_lock())
{
m_enabled = false;
flushTarget = AZStd::move(m_continuousCaptureData);
m_continuousCaptureData.clear();
AZ_TracePrintf("Profiler", "Continuous capture ended\n");
m_continuousCaptureInProgress.store(false);
m_continuousCaptureEndingMutex.unlock();
return true;
}
return false;
}
bool CpuProfilerImpl::IsContinuousCaptureInProgress() const
{
return m_continuousCaptureInProgress.load();
}
void CpuProfilerImpl::SetProfilerEnabled(bool enabled)
{
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
// Early out if the state is already the same
if (m_enabled == enabled)
// Early out if the state is already the same or a continuous capture is in progress
if (m_enabled == enabled || m_continuousCaptureInProgress.load())
{
return;
}
@ -179,6 +224,20 @@ namespace AZ
{
return;
}
if (m_continuousCaptureInProgress.load() && m_continuousCaptureEndingMutex.try_lock())
{
if (m_continuousCaptureData.full() && m_continuousCaptureData.size() != MaxFramesToSave)
{
const AZStd::size_t size = m_continuousCaptureData.size();
m_continuousCaptureData.set_capacity(AZStd::min(MaxFramesToSave, size + size / 2));
}
m_continuousCaptureData.push_back(AZStd::move(m_timeRegionMap));
m_timeRegionMap.clear();
m_continuousCaptureEndingMutex.unlock();
}
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
// Iterate through all the threads, and collect the thread's cached time regions

@ -129,6 +129,34 @@ namespace AZ
m_captureToFile = true;
}
ImGui::SameLine();
bool isInProgress = RHI::CpuProfiler::Get()->IsContinuousCaptureInProgress();
if (ImGui::Button(isInProgress ? "End" : "Begin"))
{
if (isInProgress)
{
AZStd::sys_time_t timeNow = AZStd::GetTimeNowSecond();
AZStd::string timeString;
AZStd::to_string(timeString, timeNow);
u64 currentTick = AZ::RPI::RPISystemInterface::Get()->GetCurrentTick();
const AZStd::string frameDataFilePath = AZStd::string::format(
"@user@/CpuProfiler/%s_%llu.json",
timeString.c_str(),
currentTick);
char resolvedPath[AZ::IO::MaxPathLength];
AZ::IO::FileIOBase::GetInstance()->ResolvePath(frameDataFilePath.c_str(), resolvedPath, AZ::IO::MaxPathLength);
m_lastCapturedFilePath = resolvedPath;
AZ::Render::ProfilingCaptureRequestBus::Broadcast(
&AZ::Render::ProfilingCaptureRequestBus::Events::EndContinuousCpuProfilingCapture, frameDataFilePath);
}
else
{
AZ::Render::ProfilingCaptureRequestBus::Broadcast(
&AZ::Render::ProfilingCaptureRequestBus::Events::BeginContinuousCpuProfilingCapture);
}
}
if (!m_lastCapturedFilePath.empty())
{
ImGui::SameLine();

Loading…
Cancel
Save