diff --git a/Gems/Atom/Feature/Common/Code/Include/Atom/Feature/Utils/ProfilingCaptureBus.h b/Gems/Atom/Feature/Common/Code/Include/Atom/Feature/Utils/ProfilingCaptureBus.h index 03f522ba44..b22e82f456 100644 --- a/Gems/Atom/Feature/Common/Code/Include/Atom/Feature/Utils/ProfilingCaptureBus.h +++ b/Gems/Atom/Feature/Common/Code/Include/Atom/Feature/Utils/ProfilingCaptureBus.h @@ -28,9 +28,15 @@ namespace AZ //! Dump the PipelineStatistics from passes to a json file. virtual bool CapturePassPipelineStatistics(const AZStd::string& outputFilePath) = 0; - //! Dump the Cpu Profiling Statistics to a json file. + //! Dump a single frame of Cpu profiling data virtual bool CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath) = 0; + //! Start a multiframe capture of CPU profiling data. + virtual bool BeginContinuousCpuProfilingCapture() = 0; + + //! End and dump an in-progress continuous capture. + virtual bool EndContinuousCpuProfilingCapture(const AZStd::string& outputFilePath) = 0; + //! Dump the benchmark metadata to a json file. virtual bool CaptureBenchmarkMetadata(const AZStd::string& benchmarkName, const AZStd::string& outputFilePath) = 0; }; diff --git a/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp b/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp index 1e2d549e7a..add6d0e098 100644 --- a/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp +++ b/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace AZ { @@ -157,14 +158,15 @@ namespace AZ Name m_groupName; Name m_regionName; uint16_t m_stackDepth; - AZStd::sys_time_t m_elapsedInNanoseconds; + AZStd::sys_time_t m_startTick; + AZStd::sys_time_t m_endTick; }; AZ_TYPE_INFO(CpuProfilingStatisticsSerializer, "{D5B02946-0D27-474F-9A44-364C2706DD41}"); static void Reflect(AZ::ReflectContext* context); CpuProfilingStatisticsSerializer() = default; - CpuProfilingStatisticsSerializer(const RHI::CpuProfiler::TimeRegionMap& timeRegionMap); + CpuProfilingStatisticsSerializer(const AZStd::ring_buffer& continuousData); AZStd::vector m_cpuProfilingStatisticsSerializerEntries; }; @@ -327,17 +329,20 @@ namespace AZ // --- CpuProfilingStatisticsSerializer --- - CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const RHI::CpuProfiler::TimeRegionMap& timeRegionMap) + CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const AZStd::ring_buffer& continuousData) { // Create serializable entries - for (auto& threadEntry : timeRegionMap) + for (const auto& timeRegionMap : continuousData) { - for (auto& cachedRegionEntry : threadEntry.second) + for (const auto& threadEntry : timeRegionMap) { - m_cpuProfilingStatisticsSerializerEntries.insert( - m_cpuProfilingStatisticsSerializerEntries.end(), - cachedRegionEntry.second.begin(), - cachedRegionEntry.second.end()); + for (const auto& cachedRegionEntry : threadEntry.second) + { + m_cpuProfilingStatisticsSerializerEntries.insert( + m_cpuProfilingStatisticsSerializerEntries.end(), + cachedRegionEntry.second.begin(), + cachedRegionEntry.second.end()); + } } } } @@ -359,19 +364,11 @@ namespace AZ CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::CpuProfilingStatisticsSerializerEntry(const RHI::CachedTimeRegion& cachedTimeRegion) { - // Converts ticks to Nanoseconds - static const auto ticksToNanoSeconds = [](AZStd::sys_time_t elapsedInTicks) -> AZStd::sys_time_t - { - const AZStd::sys_time_t ticksPerSecond = AZStd::GetTimeTicksPerSecond(); - - const AZStd::sys_time_t timeInNanoseconds = (elapsedInTicks * 1000000) / (ticksPerSecond / 1000); - return timeInNanoseconds; - }; - m_groupName = cachedTimeRegion.m_groupRegionName->m_groupName; m_regionName = cachedTimeRegion.m_groupRegionName->m_regionName; m_stackDepth = cachedTimeRegion.m_stackDepth; - m_elapsedInNanoseconds = ticksToNanoSeconds(cachedTimeRegion.m_endTick - cachedTimeRegion.m_startTick); + m_startTick = cachedTimeRegion.m_startTick; + m_endTick = cachedTimeRegion.m_endTick; } void CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::Reflect(AZ::ReflectContext* context) @@ -383,7 +380,8 @@ namespace AZ ->Field("groupName", &CpuProfilingStatisticsSerializerEntry::m_groupName) ->Field("regionName", &CpuProfilingStatisticsSerializerEntry::m_regionName) ->Field("stackDepth", &CpuProfilingStatisticsSerializerEntry::m_stackDepth) - ->Field("elapsedInNanoseconds", &CpuProfilingStatisticsSerializerEntry::m_elapsedInNanoseconds) + ->Field("startTick", &CpuProfilingStatisticsSerializerEntry::m_startTick) + ->Field("endTick", &CpuProfilingStatisticsSerializerEntry::m_endTick) ; } } @@ -474,6 +472,12 @@ namespace AZ TickBus::Handler::BusDisconnect(); ProfilingCaptureRequestBus::Handler::BusDisconnect(); + + // Block deactivation until the IO thread has finished serializing the CPU data + if (m_cpuDataSerializationThread.joinable()) + { + m_cpuDataSerializationThread.join(); + } } bool ProfilingCaptureSystemComponent::CapturePassTimestamp(const AZStd::string& outputFilePath) @@ -641,6 +645,43 @@ namespace AZ return captureStarted; } + bool SerializeCpuProfilingData(const AZStd::ring_buffer& data, AZStd::string outputFilePath, bool wasEnabled) + { + AZ_TracePrintf("ProfilingCaptureSystemComponent", "Beginning serialization of %zu frames of profiling data\n", data.size()); + JsonSerializerSettings serializationSettings; + serializationSettings.m_keepDefaults = true; + + CpuProfilingStatisticsSerializer serializer(data); + + const auto saveResult = JsonSerializationUtils::SaveObjectToFile(&serializer, + outputFilePath, (CpuProfilingStatisticsSerializer*)nullptr, &serializationSettings); + + AZStd::string captureInfo = outputFilePath; + if (!saveResult.IsSuccess()) + { + captureInfo = AZStd::string::format("Failed to save Cpu Profiling Statistics data to file '%s'. Error: %s", + outputFilePath.c_str(), + saveResult.GetError().c_str()); + AZ_Warning("ProfilingCaptureSystemComponent", false, captureInfo.c_str()); + } + else + { + AZ_Printf("ProfilingCaptureSystemComponent", "Cpu profiling statistics was saved to file [%s]\n", outputFilePath.c_str()); + } + + // Disable the profiler again + if (!wasEnabled) + { + RHI::CpuProfiler::Get()->SetProfilerEnabled(false); + } + + // Notify listeners that the pass' PipelineStatistics queries capture has finished. + ProfilingCaptureNotificationBus::Broadcast(&ProfilingCaptureNotificationBus::Events::OnCaptureCpuProfilingStatisticsFinished, + saveResult.IsSuccess(), + captureInfo); + return saveResult.IsSuccess(); + } + bool ProfilingCaptureSystemComponent::CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath) { // Start the cpu profiling @@ -652,49 +693,66 @@ namespace AZ const bool captureStarted = m_cpuProfilingStatisticsCapture.StartCapture([this, outputFilePath, wasEnabled]() { - JsonSerializerSettings serializationSettings; - serializationSettings.m_keepDefaults = true; + // Blocking call for a single frame of data, avoid thread overhead + AZStd::ring_buffer singleFrameData; + singleFrameData.push_back(RHI::CpuProfiler::Get()->GetTimeRegionMap()); + SerializeCpuProfilingData(singleFrameData, outputFilePath, wasEnabled); + }); + + // Start the TickBus. + if (captureStarted) + { + TickBus::Handler::BusConnect(); + } - // Get time Cpu profiled time regions - const RHI::CpuProfiler::TimeRegionMap& timeRegionMap = RHI::CpuProfiler::Get()->GetTimeRegionMap(); + return captureStarted; + } - CpuProfilingStatisticsSerializer serializer(timeRegionMap); - const auto saveResult = JsonSerializationUtils::SaveObjectToFile(&serializer, - outputFilePath, (CpuProfilingStatisticsSerializer*)nullptr, &serializationSettings); + bool ProfilingCaptureSystemComponent::BeginContinuousCpuProfilingCapture() + { + return AZ::RHI::CpuProfiler::Get()->BeginContinuousCapture(); + } - AZStd::string captureInfo = outputFilePath; - if (!saveResult.IsSuccess()) - { - captureInfo = AZStd::string::format("Failed to save Cpu Profiling Statistics data to file '%s'. Error: %s", - outputFilePath.c_str(), - saveResult.GetError().c_str()); - AZ_Warning("ProfilingCaptureSystemComponent", false, captureInfo.c_str()); - } - else + bool ProfilingCaptureSystemComponent::EndContinuousCpuProfilingCapture(const AZStd::string& outputFilePath) + { + bool expected = false; + if (m_cpuDataSerializationInProgress.compare_exchange_strong(expected, true)) + { + AZStd::ring_buffer captureResult; + const bool captureEnded = AZ::RHI::CpuProfiler::Get()->EndContinuousCapture(captureResult); + if (!captureEnded) { - AZ_Printf("ProfilingCaptureSystemComponent", "Cpu profiling statistics was saved to file [%s]\n", outputFilePath.c_str()); + AZ_TracePrintf("ProfilingCaptureSystemComponent", "Could not end the continuous capture, is one in progress?\n"); + m_cpuDataSerializationInProgress.store(false); + return false; } - // Disable the profiler again - if (!wasEnabled) + // cpuProfilingData could be 1GB+ once saved, so use an IO thread to write it to disk. + auto threadIoFunction = + [data = AZStd::move(captureResult), filePath = AZStd::string(outputFilePath), &flag = m_cpuDataSerializationInProgress]() { - RHI::CpuProfiler::Get()->SetProfilerEnabled(false); + SerializeCpuProfilingData(data, filePath, true); + flag.store(false); + }; + + // If the thread object already exists (ex. we have already serialized data), join. This will not block since + // m_cpuDataSerializationInProgress was false, meaning the IO thread has already completed execution. + // TODO Use a reusable thread implementation over repeated creation + destruction of threads [ATOM-16214] + if (m_cpuDataSerializationThread.joinable()) + { + m_cpuDataSerializationThread.join(); } - // Notify listeners that the pass' PipelineStatistics queries capture has finished. - ProfilingCaptureNotificationBus::Broadcast(&ProfilingCaptureNotificationBus::Events::OnCaptureCpuProfilingStatisticsFinished, - saveResult.IsSuccess(), - captureInfo); + auto thread = AZStd::thread(threadIoFunction); + m_cpuDataSerializationThread = AZStd::move(thread); - }); - - // Start the TickBus. - if (captureStarted) - { - TickBus::Handler::BusConnect(); + return true; } - return captureStarted; + AZ_TracePrintf( + "ProfilingSystemCaptureComponent", + "Cannot end a continuous capture - another serialization is currently in progress\n"); + return false; } bool ProfilingCaptureSystemComponent::CaptureBenchmarkMetadata(const AZStd::string& benchmarkName, const AZStd::string& outputFilePath) diff --git a/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.h b/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.h index c401d27f30..1846767139 100644 --- a/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.h +++ b/Gems/Atom/Feature/Common/Code/Source/ProfilingCaptureSystemComponent.h @@ -12,6 +12,7 @@ #include #include +#include namespace AZ { @@ -71,6 +72,8 @@ namespace AZ bool CaptureCpuFrameTime(const AZStd::string& outputFilePath) override; bool CapturePassPipelineStatistics(const AZStd::string& outputFilePath) override; bool CaptureCpuProfilingStatistics(const AZStd::string& outputFilePath) override; + bool BeginContinuousCpuProfilingCapture() override; + bool EndContinuousCpuProfilingCapture(const AZStd::string& outputFilePath) override; bool CaptureBenchmarkMetadata(const AZStd::string& benchmarkName, const AZStd::string& outputFilePath) override; private: @@ -86,6 +89,11 @@ namespace AZ DelayedQueryCaptureHelper m_pipelineStatisticsCapture; DelayedQueryCaptureHelper m_cpuProfilingStatisticsCapture; DelayedQueryCaptureHelper m_benchmarkMetadataCapture; + + // Flag passed by reference to the CPU profiling data serialization job, blocks new continuous capture requests when set. + AZStd::atomic_bool m_cpuDataSerializationInProgress = false; + + AZStd::thread m_cpuDataSerializationThread; }; } } diff --git a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h index 1c10b8829f..2248474820 100644 --- a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h +++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfiler.h @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -82,6 +83,14 @@ namespace AZ //! Get the last frame's TimeRegionMap virtual const TimeRegionMap& GetTimeRegionMap() const = 0; + //! Begin a continuous capture. Blocks the profiler from being toggled off until EndContinuousCapture is called. + [[nodiscard]] virtual bool BeginContinuousCapture() = 0; + + //! Flush the CPU Profiler's saved data into the passed ring buffer . + [[nodiscard]] virtual bool EndContinuousCapture(AZStd::ring_buffer& flushTarget) = 0; + + virtual bool IsContinuousCaptureInProgress() const = 0; + //! Enable/Disable the CpuProfiler virtual void SetProfilerEnabled(bool enabled) = 0; diff --git a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h index 640c67858b..7d3b0c5b81 100644 --- a/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h +++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/CpuProfilerImpl.h @@ -106,13 +106,18 @@ namespace AZ void OnSystemTick() final override; //! CpuProfiler overrides... - void BeginTimeRegion(TimeRegion& timeRegion) final; - void EndTimeRegion() final; - const TimeRegionMap& GetTimeRegionMap() const final; - void SetProfilerEnabled(bool enabled) final; - bool IsProfilerEnabled() const final; + void BeginTimeRegion(TimeRegion& timeRegion) final override; + void EndTimeRegion() final override; + const TimeRegionMap& GetTimeRegionMap() const final override; + bool BeginContinuousCapture() final override; + bool EndContinuousCapture(AZStd::ring_buffer& flushTarget) final override; + bool IsContinuousCaptureInProgress() const final override; + void SetProfilerEnabled(bool enabled) final override; + bool IsProfilerEnabled() const final override; private: + static constexpr AZStd::size_t MaxFramesToSave = 2 * 60 * 120; // 2 minutes of 120fps + // Lazily create and register the local thread data void RegisterThreadStorage(); @@ -134,6 +139,14 @@ namespace AZ AZStd::shared_mutex m_shutdownMutex; bool m_initialized = false; + + AZStd::mutex m_continuousCaptureEndingMutex; + + AZStd::atomic_bool m_continuousCaptureInProgress; + + // Stores multiple frames of profiling data, size is controlled by MaxFramesToSave. Flushed when EndContinuousCapture is called. + // Ring buffer so that we can have fast append of new data + removal of old profiling data with good cache locality. + AZStd::ring_buffer m_continuousCaptureData; }; }; // namespace RPI diff --git a/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp b/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp index 73a1881ead..cdfd4ac469 100644 --- a/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp +++ b/Gems/Atom/RHI/Code/Source/RHI/CpuProfilerImpl.cpp @@ -81,6 +81,7 @@ namespace AZ Interface::Register(this); m_initialized = true; SystemTickBus::Handler::BusConnect(); + m_continuousCaptureData.set_capacity(10); } void CpuProfilerImpl::Shutdown() @@ -101,6 +102,8 @@ namespace AZ m_registeredThreads.clear(); m_timeRegionMap.clear(); m_initialized = false; + m_continuousCaptureInProgress.store(false); + m_continuousCaptureData.clear(); SystemTickBus::Handler::BusDisconnect(); } @@ -141,12 +144,54 @@ namespace AZ return m_timeRegionMap; } + bool CpuProfilerImpl::BeginContinuousCapture() + { + bool expected = false; + if (m_continuousCaptureInProgress.compare_exchange_strong(expected, true)) + { + m_enabled = true; + AZ_TracePrintf("Profiler", "Continuous capture started\n"); + return true; + } + + AZ_TracePrintf("Profiler", "Attempting to start a continuous capture while one already in progress"); + return false; + } + + bool CpuProfilerImpl::EndContinuousCapture(AZStd::ring_buffer& flushTarget) + { + if (!m_continuousCaptureInProgress.load()) + { + AZ_TracePrintf("Profiler", "Attempting to end a continuous capture while one not in progress"); + return false; + } + + if (m_continuousCaptureEndingMutex.try_lock()) + { + m_enabled = false; + flushTarget = AZStd::move(m_continuousCaptureData); + m_continuousCaptureData.clear(); + AZ_TracePrintf("Profiler", "Continuous capture ended\n"); + m_continuousCaptureInProgress.store(false); + + m_continuousCaptureEndingMutex.unlock(); + return true; + } + + return false; + } + + bool CpuProfilerImpl::IsContinuousCaptureInProgress() const + { + return m_continuousCaptureInProgress.load(); + } + void CpuProfilerImpl::SetProfilerEnabled(bool enabled) { AZStd::unique_lock lock(m_threadRegisterMutex); - // Early out if the state is already the same - if (m_enabled == enabled) + // Early out if the state is already the same or a continuous capture is in progress + if (m_enabled == enabled || m_continuousCaptureInProgress.load()) { return; } @@ -179,6 +224,20 @@ namespace AZ { return; } + + if (m_continuousCaptureInProgress.load() && m_continuousCaptureEndingMutex.try_lock()) + { + if (m_continuousCaptureData.full() && m_continuousCaptureData.size() != MaxFramesToSave) + { + const AZStd::size_t size = m_continuousCaptureData.size(); + m_continuousCaptureData.set_capacity(AZStd::min(MaxFramesToSave, size + size / 2)); + } + + m_continuousCaptureData.push_back(AZStd::move(m_timeRegionMap)); + m_timeRegionMap.clear(); + m_continuousCaptureEndingMutex.unlock(); + } + AZStd::unique_lock lock(m_threadRegisterMutex); // Iterate through all the threads, and collect the thread's cached time regions diff --git a/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl b/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl index ae707a10a7..ffc9a60c00 100644 --- a/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl +++ b/Gems/Atom/Utils/Code/Include/Atom/Utils/ImGuiCpuProfiler.inl @@ -129,6 +129,34 @@ namespace AZ m_captureToFile = true; } + ImGui::SameLine(); + bool isInProgress = RHI::CpuProfiler::Get()->IsContinuousCaptureInProgress(); + if (ImGui::Button(isInProgress ? "End" : "Begin")) + { + if (isInProgress) + { + AZStd::sys_time_t timeNow = AZStd::GetTimeNowSecond(); + AZStd::string timeString; + AZStd::to_string(timeString, timeNow); + u64 currentTick = AZ::RPI::RPISystemInterface::Get()->GetCurrentTick(); + const AZStd::string frameDataFilePath = AZStd::string::format( + "@user@/CpuProfiler/%s_%llu.json", + timeString.c_str(), + currentTick); + char resolvedPath[AZ::IO::MaxPathLength]; + AZ::IO::FileIOBase::GetInstance()->ResolvePath(frameDataFilePath.c_str(), resolvedPath, AZ::IO::MaxPathLength); + m_lastCapturedFilePath = resolvedPath; + AZ::Render::ProfilingCaptureRequestBus::Broadcast( + &AZ::Render::ProfilingCaptureRequestBus::Events::EndContinuousCpuProfilingCapture, frameDataFilePath); + } + + else + { + AZ::Render::ProfilingCaptureRequestBus::Broadcast( + &AZ::Render::ProfilingCaptureRequestBus::Events::BeginContinuousCpuProfilingCapture); + } + } + if (!m_lastCapturedFilePath.empty()) { ImGui::SameLine();