[atom_cpu_profiler_gem_promotion] updated namespaces for moved files

Signed-off-by: AMZN-ScottR <24445312+AMZN-ScottR@users.noreply.github.com>
monroegm-disable-blank-issue-2
AMZN-ScottR 4 years ago
parent 6946145b23
commit d193dc40a9

@ -15,73 +15,69 @@
#include <AzCore/std/containers/unordered_map.h>
#include <AzCore/std/string/string.h>
namespace AZ
namespace Profiler
{
namespace RHI
//! Structure that is used to cache a timed region into the thread's local storage.
struct CachedTimeRegion
{
//! Structure that is used to cache a timed region into the thread's local storage.
struct CachedTimeRegion
//! Structure used internally for caching assumed global string pointers (ideally literals) to the marker group/region
//! NOTE: When used in a separate shared library, the library mustn't be unloaded before the CpuProfiler is shutdown.
struct GroupRegionName
{
//! Structure used internally for caching assumed global string pointers (ideally literals) to the marker group/region
//! NOTE: When used in a separate shared library, the library mustn't be unloaded before the CpuProfiler is shutdown.
struct GroupRegionName
{
GroupRegionName() = delete;
GroupRegionName(const char* const group, const char* const region);
GroupRegionName() = delete;
GroupRegionName(const char* const group, const char* const region);
const char* m_groupName = nullptr;
const char* m_regionName = nullptr;
const char* m_groupName = nullptr;
const char* m_regionName = nullptr;
struct Hash
{
AZStd::size_t operator()(const GroupRegionName& name) const;
};
bool operator==(const GroupRegionName& other) const;
struct Hash
{
AZStd::size_t operator()(const GroupRegionName& name) const;
};
bool operator==(const GroupRegionName& other) const;
};
CachedTimeRegion() = default;
CachedTimeRegion(const GroupRegionName& groupRegionName);
CachedTimeRegion(const GroupRegionName& groupRegionName, uint16_t stackDepth, uint64_t startTick, uint64_t endTick);
GroupRegionName m_groupRegionName{nullptr, nullptr};
CachedTimeRegion() = default;
CachedTimeRegion(const GroupRegionName& groupRegionName);
CachedTimeRegion(const GroupRegionName& groupRegionName, uint16_t stackDepth, uint64_t startTick, uint64_t endTick);
uint16_t m_stackDepth = 0u;
AZStd::sys_time_t m_startTick = 0;
AZStd::sys_time_t m_endTick = 0;
};
GroupRegionName m_groupRegionName{nullptr, nullptr};
//! Interface class of the CpuProfiler
class CpuProfiler
{
public:
using ThreadTimeRegionMap = AZStd::unordered_map<AZStd::string, AZStd::vector<CachedTimeRegion>>;
using TimeRegionMap = AZStd::unordered_map<AZStd::thread_id, ThreadTimeRegionMap>;
uint16_t m_stackDepth = 0u;
AZStd::sys_time_t m_startTick = 0;
AZStd::sys_time_t m_endTick = 0;
};
AZ_RTTI(CpuProfiler, "{127C1D0B-BE05-4E18-A8F6-24F3EED2ECA6}");
//! Interface class of the CpuProfiler
class CpuProfiler
{
public:
using ThreadTimeRegionMap = AZStd::unordered_map<AZStd::string, AZStd::vector<CachedTimeRegion>>;
using TimeRegionMap = AZStd::unordered_map<AZStd::thread_id, ThreadTimeRegionMap>;
CpuProfiler() = default;
virtual ~CpuProfiler() = default;
AZ_RTTI(CpuProfiler, "{127C1D0B-BE05-4E18-A8F6-24F3EED2ECA6}");
AZ_DISABLE_COPY_MOVE(CpuProfiler);
CpuProfiler() = default;
virtual ~CpuProfiler() = default;
static CpuProfiler* Get();
AZ_DISABLE_COPY_MOVE(CpuProfiler);
//! Get the last frame's TimeRegionMap
virtual const TimeRegionMap& GetTimeRegionMap() const = 0;
static CpuProfiler* Get();
//! Begin a continuous capture. Blocks the profiler from being toggled off until EndContinuousCapture is called.
[[nodiscard]] virtual bool BeginContinuousCapture() = 0;
//! Get the last frame's TimeRegionMap
virtual const TimeRegionMap& GetTimeRegionMap() const = 0;
//! Flush the CPU Profiler's saved data into the passed ring buffer .
[[nodiscard]] virtual bool EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget) = 0;
//! Begin a continuous capture. Blocks the profiler from being toggled off until EndContinuousCapture is called.
[[nodiscard]] virtual bool BeginContinuousCapture() = 0;
virtual bool IsContinuousCaptureInProgress() const = 0;
//! Flush the CPU Profiler's saved data into the passed ring buffer .
[[nodiscard]] virtual bool EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget) = 0;
//! Enable/Disable the CpuProfiler
virtual void SetProfilerEnabled(bool enabled) = 0;
virtual bool IsContinuousCaptureInProgress() const = 0;
virtual bool IsProfilerEnabled() const = 0 ;
};
//! Enable/Disable the CpuProfiler
virtual void SetProfilerEnabled(bool enabled) = 0;
} // namespace RPI
} // namespace AZ
virtual bool IsProfilerEnabled() const = 0 ;
};
} // namespace Profiler

@ -15,434 +15,431 @@
#include <AzCore/Statistics/StatisticalProfilerProxy.h>
#include <Atom/RHI/RHIUtils.h>
namespace AZ
namespace Profiler
{
namespace RHI
{
thread_local CpuTimingLocalStorage* CpuProfilerImpl::ms_threadLocalStorage = nullptr;
thread_local CpuTimingLocalStorage* CpuProfilerImpl::ms_threadLocalStorage = nullptr;
// --- CpuProfiler ---
// --- CpuProfiler ---
CpuProfiler* CpuProfiler::Get()
{
return Interface<CpuProfiler>::Get();
}
CpuProfiler* CpuProfiler::Get()
{
return Interface<CpuProfiler>::Get();
}
// --- CachedTimeRegion ---
// --- CachedTimeRegion ---
CachedTimeRegion::CachedTimeRegion(const GroupRegionName& groupRegionName)
{
m_groupRegionName = groupRegionName;
}
CachedTimeRegion::CachedTimeRegion(const GroupRegionName& groupRegionName)
{
m_groupRegionName = groupRegionName;
}
CachedTimeRegion::CachedTimeRegion(const GroupRegionName& groupRegionName, uint16_t stackDepth, uint64_t startTick, uint64_t endTick)
{
m_groupRegionName = groupRegionName;
m_stackDepth = stackDepth;
m_startTick = startTick;
m_endTick = endTick;
}
CachedTimeRegion::CachedTimeRegion(const GroupRegionName& groupRegionName, uint16_t stackDepth, uint64_t startTick, uint64_t endTick)
{
m_groupRegionName = groupRegionName;
m_stackDepth = stackDepth;
m_startTick = startTick;
m_endTick = endTick;
}
// --- GroupRegionName ---
// --- GroupRegionName ---
CachedTimeRegion::GroupRegionName::GroupRegionName(const char* const group, const char* const region) :
m_groupName(group),
m_regionName(region)
{
}
CachedTimeRegion::GroupRegionName::GroupRegionName(const char* const group, const char* const region) :
m_groupName(group),
m_regionName(region)
{
}
AZStd::size_t CachedTimeRegion::GroupRegionName::Hash::operator()(const CachedTimeRegion::GroupRegionName& name) const
{
AZStd::size_t seed = 0;
AZStd::hash_combine(seed, name.m_groupName);
AZStd::hash_combine(seed, name.m_regionName);
return seed;
}
AZStd::size_t CachedTimeRegion::GroupRegionName::Hash::operator()(const CachedTimeRegion::GroupRegionName& name) const
{
AZStd::size_t seed = 0;
AZStd::hash_combine(seed, name.m_groupName);
AZStd::hash_combine(seed, name.m_regionName);
return seed;
}
bool CachedTimeRegion::GroupRegionName::operator==(const GroupRegionName& other) const
{
return (m_groupName == other.m_groupName) && (m_regionName == other.m_regionName);
}
bool CachedTimeRegion::GroupRegionName::operator==(const GroupRegionName& other) const
{
return (m_groupName == other.m_groupName) && (m_regionName == other.m_regionName);
}
// --- CpuProfilerImpl ---
// --- CpuProfilerImpl ---
void CpuProfilerImpl::Init()
{
Interface<AZ::Debug::Profiler>::Register(this);
Interface<CpuProfiler>::Register(this);
m_initialized = true;
SystemTickBus::Handler::BusConnect();
m_continuousCaptureData.set_capacity(10);
void CpuProfilerImpl::Init()
{
Interface<AZ::Debug::Profiler>::Register(this);
Interface<CpuProfiler>::Register(this);
m_initialized = true;
SystemTickBus::Handler::BusConnect();
m_continuousCaptureData.set_capacity(10);
if (auto statsProfiler = AZ::Interface<AZ::Statistics::StatisticalProfilerProxy>::Get(); statsProfiler)
{
statsProfiler->ActivateProfiler(AZ_CRC_CE("RHI"), true);
}
if (auto statsProfiler = AZ::Interface<AZ::Statistics::StatisticalProfilerProxy>::Get(); statsProfiler)
{
statsProfiler->ActivateProfiler(AZ_CRC_CE("RHI"), true);
}
}
void CpuProfilerImpl::Shutdown()
void CpuProfilerImpl::Shutdown()
{
if (!m_initialized)
{
if (!m_initialized)
{
return;
}
// When this call is made, no more thread profiling calls can be performed anymore
Interface<CpuProfiler>::Unregister(this);
Interface<AZ::Debug::Profiler>::Unregister(this);
// Wait for the remaining threads that might still be processing its profiling calls
AZStd::unique_lock<AZStd::shared_mutex> shutdownLock(m_shutdownMutex);
m_enabled = false;
// Cleanup all TLS
m_registeredThreads.clear();
m_timeRegionMap.clear();
m_initialized = false;
m_continuousCaptureInProgress.store(false);
m_continuousCaptureData.clear();
SystemTickBus::Handler::BusDisconnect();
return;
}
// When this call is made, no more thread profiling calls can be performed anymore
Interface<CpuProfiler>::Unregister(this);
Interface<AZ::Debug::Profiler>::Unregister(this);
void CpuProfilerImpl::BeginRegion(const AZ::Debug::Budget* budget, const char* eventName)
{
// Try to lock here, the shutdownMutex will only be contested when the CpuProfiler is shutting down.
if (m_shutdownMutex.try_lock_shared())
{
if (m_enabled)
{
// Lazy initialization, creates an instance of the Thread local data if it's not created, and registers it
RegisterThreadStorage();
// Wait for the remaining threads that might still be processing its profiling calls
AZStd::unique_lock<AZStd::shared_mutex> shutdownLock(m_shutdownMutex);
// Push it to the stack
CachedTimeRegion timeRegion({budget->Name(), eventName});
ms_threadLocalStorage->RegionStackPushBack(timeRegion);
}
m_enabled = false;
m_shutdownMutex.unlock_shared();
}
}
// Cleanup all TLS
m_registeredThreads.clear();
m_timeRegionMap.clear();
m_initialized = false;
m_continuousCaptureInProgress.store(false);
m_continuousCaptureData.clear();
SystemTickBus::Handler::BusDisconnect();
}
void CpuProfilerImpl::EndRegion([[maybe_unused]] const AZ::Debug::Budget* budget)
void CpuProfilerImpl::BeginRegion(const AZ::Debug::Budget* budget, const char* eventName)
{
// Try to lock here, the shutdownMutex will only be contested when the CpuProfiler is shutting down.
if (m_shutdownMutex.try_lock_shared())
{
// Try to lock here, the shutdownMutex will only be contested when the CpuProfiler is shutting down.
if (m_shutdownMutex.try_lock_shared())
if (m_enabled)
{
// guard against enabling mid-marker
if (m_enabled && ms_threadLocalStorage != nullptr)
{
ms_threadLocalStorage->RegionStackPopBack();
}
// Lazy initialization, creates an instance of the Thread local data if it's not created, and registers it
RegisterThreadStorage();
m_shutdownMutex.unlock_shared();
// Push it to the stack
CachedTimeRegion timeRegion({budget->Name(), eventName});
ms_threadLocalStorage->RegionStackPushBack(timeRegion);
}
}
const CpuProfiler::TimeRegionMap& CpuProfilerImpl::GetTimeRegionMap() const
{
return m_timeRegionMap;
m_shutdownMutex.unlock_shared();
}
}
bool CpuProfilerImpl::BeginContinuousCapture()
void CpuProfilerImpl::EndRegion([[maybe_unused]] const AZ::Debug::Budget* budget)
{
// Try to lock here, the shutdownMutex will only be contested when the CpuProfiler is shutting down.
if (m_shutdownMutex.try_lock_shared())
{
bool expected = false;
if (m_continuousCaptureInProgress.compare_exchange_strong(expected, true))
// guard against enabling mid-marker
if (m_enabled && ms_threadLocalStorage != nullptr)
{
m_enabled = true;
AZ_TracePrintf("Profiler", "Continuous capture started\n");
return true;
ms_threadLocalStorage->RegionStackPopBack();
}
AZ_TracePrintf("Profiler", "Attempting to start a continuous capture while one already in progress");
return false;
m_shutdownMutex.unlock_shared();
}
}
bool CpuProfilerImpl::EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget)
const CpuProfiler::TimeRegionMap& CpuProfilerImpl::GetTimeRegionMap() const
{
return m_timeRegionMap;
}
bool CpuProfilerImpl::BeginContinuousCapture()
{
bool expected = false;
if (m_continuousCaptureInProgress.compare_exchange_strong(expected, true))
{
if (!m_continuousCaptureInProgress.load())
{
AZ_TracePrintf("Profiler", "Attempting to end a continuous capture while one not in progress");
return false;
}
m_enabled = true;
AZ_TracePrintf("Profiler", "Continuous capture started\n");
return true;
}
if (m_continuousCaptureEndingMutex.try_lock())
{
m_enabled = false;
flushTarget = AZStd::move(m_continuousCaptureData);
m_continuousCaptureData.clear();
AZ_TracePrintf("Profiler", "Continuous capture ended\n");
m_continuousCaptureInProgress.store(false);
m_continuousCaptureEndingMutex.unlock();
return true;
}
AZ_TracePrintf("Profiler", "Attempting to start a continuous capture while one already in progress");
return false;
}
bool CpuProfilerImpl::EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget)
{
if (!m_continuousCaptureInProgress.load())
{
AZ_TracePrintf("Profiler", "Attempting to end a continuous capture while one not in progress");
return false;
}
bool CpuProfilerImpl::IsContinuousCaptureInProgress() const
if (m_continuousCaptureEndingMutex.try_lock())
{
return m_continuousCaptureInProgress.load();
m_enabled = false;
flushTarget = AZStd::move(m_continuousCaptureData);
m_continuousCaptureData.clear();
AZ_TracePrintf("Profiler", "Continuous capture ended\n");
m_continuousCaptureInProgress.store(false);
m_continuousCaptureEndingMutex.unlock();
return true;
}
void CpuProfilerImpl::SetProfilerEnabled(bool enabled)
{
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
return false;
}
// Early out if the state is already the same or a continuous capture is in progress
if (m_enabled == enabled || m_continuousCaptureInProgress.load())
{
return;
}
// Set the dirty flag in all the TLS to clear the caches
if (enabled)
{
// Iterate through all the threads, and set the clearing flag
for (auto& threadLocal : m_registeredThreads)
{
threadLocal->m_clearContainers = true;
}
bool CpuProfilerImpl::IsContinuousCaptureInProgress() const
{
return m_continuousCaptureInProgress.load();
}
m_enabled = true;
}
else
{
m_enabled = false;
}
}
void CpuProfilerImpl::SetProfilerEnabled(bool enabled)
{
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
bool CpuProfilerImpl::IsProfilerEnabled() const
// Early out if the state is already the same or a continuous capture is in progress
if (m_enabled == enabled || m_continuousCaptureInProgress.load())
{
return m_enabled;
return;
}
void CpuProfilerImpl::OnSystemTick()
// Set the dirty flag in all the TLS to clear the caches
if (enabled)
{
if (!m_enabled)
{
return;
}
if (m_continuousCaptureInProgress.load() && m_continuousCaptureEndingMutex.try_lock())
{
if (m_continuousCaptureData.full() && m_continuousCaptureData.size() != MaxFramesToSave)
{
const AZStd::size_t size = m_continuousCaptureData.size();
m_continuousCaptureData.set_capacity(AZStd::min(MaxFramesToSave, size + size / 2));
}
m_continuousCaptureData.push_back(AZStd::move(m_timeRegionMap));
m_timeRegionMap.clear();
m_continuousCaptureEndingMutex.unlock();
}
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
// Iterate through all the threads, and collect the thread's cached time regions
TimeRegionMap newMap;
// Iterate through all the threads, and set the clearing flag
for (auto& threadLocal : m_registeredThreads)
{
ThreadTimeRegionMap& threadMapEntry = newMap[threadLocal->m_executingThreadId];
threadLocal->TryFlushCachedMap(threadMapEntry);
threadLocal->m_clearContainers = true;
}
// Clear all TLS that flagged themselves to be deleted, meaning that the thread is already terminated
AZStd::remove_if(m_registeredThreads.begin(), m_registeredThreads.end(), [](const RHI::Ptr<CpuTimingLocalStorage>& thread)
{
return thread->m_deleteFlag.load();
});
m_enabled = true;
}
else
{
m_enabled = false;
}
}
// Update our saved time regions to the last frame's collected data
m_timeRegionMap = AZStd::move(newMap);
bool CpuProfilerImpl::IsProfilerEnabled() const
{
return m_enabled;
}
void CpuProfilerImpl::OnSystemTick()
{
if (!m_enabled)
{
return;
}
void CpuProfilerImpl::RegisterThreadStorage()
if (m_continuousCaptureInProgress.load() && m_continuousCaptureEndingMutex.try_lock())
{
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
if (!ms_threadLocalStorage)
if (m_continuousCaptureData.full() && m_continuousCaptureData.size() != MaxFramesToSave)
{
ms_threadLocalStorage = aznew CpuTimingLocalStorage();
m_registeredThreads.emplace_back(ms_threadLocalStorage);
const AZStd::size_t size = m_continuousCaptureData.size();
m_continuousCaptureData.set_capacity(AZStd::min(MaxFramesToSave, size + size / 2));
}
m_continuousCaptureData.push_back(AZStd::move(m_timeRegionMap));
m_timeRegionMap.clear();
m_continuousCaptureEndingMutex.unlock();
}
// --- CpuTimingLocalStorage ---
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
CpuTimingLocalStorage::CpuTimingLocalStorage()
// Iterate through all the threads, and collect the thread's cached time regions
TimeRegionMap newMap;
for (auto& threadLocal : m_registeredThreads)
{
m_executingThreadId = AZStd::this_thread::get_id();
ThreadTimeRegionMap& threadMapEntry = newMap[threadLocal->m_executingThreadId];
threadLocal->TryFlushCachedMap(threadMapEntry);
}
CpuTimingLocalStorage::~CpuTimingLocalStorage()
// Clear all TLS that flagged themselves to be deleted, meaning that the thread is already terminated
AZStd::remove_if(m_registeredThreads.begin(), m_registeredThreads.end(), [](const RHI::Ptr<CpuTimingLocalStorage>& thread)
{
m_deleteFlag = true;
}
return thread->m_deleteFlag.load();
});
void CpuTimingLocalStorage::RegionStackPushBack(CachedTimeRegion& timeRegion)
// Update our saved time regions to the last frame's collected data
m_timeRegionMap = AZStd::move(newMap);
}
void CpuProfilerImpl::RegisterThreadStorage()
{
AZStd::unique_lock<AZStd::mutex> lock(m_threadRegisterMutex);
if (!ms_threadLocalStorage)
{
// If it was (re)enabled, clear the lists first
if (m_clearContainers)
{
m_clearContainers = false;
ms_threadLocalStorage = aznew CpuTimingLocalStorage();
m_registeredThreads.emplace_back(ms_threadLocalStorage);
}
}
m_stackLevel = 0;
m_cachedTimeRegionMap.clear();
m_timeRegionStack.clear();
m_cachedTimeRegions.clear();
}
// --- CpuTimingLocalStorage ---
timeRegion.m_stackDepth = static_cast<uint16_t>(m_stackLevel);
CpuTimingLocalStorage::CpuTimingLocalStorage()
{
m_executingThreadId = AZStd::this_thread::get_id();
}
AZ_Assert(m_timeRegionStack.size() < TimeRegionStackSize, "Adding too many time regions to the stack. Increase the size of TimeRegionStackSize.");
m_timeRegionStack.push_back(timeRegion);
CpuTimingLocalStorage::~CpuTimingLocalStorage()
{
m_deleteFlag = true;
}
// Increment the stack
m_stackLevel++;
void CpuTimingLocalStorage::RegionStackPushBack(CachedTimeRegion& timeRegion)
{
// If it was (re)enabled, clear the lists first
if (m_clearContainers)
{
m_clearContainers = false;
// Set the starting time at the end, to avoid recording the minor overhead
m_timeRegionStack.back().m_startTick = AZStd::GetTimeNowTicks();
m_stackLevel = 0;
m_cachedTimeRegionMap.clear();
m_timeRegionStack.clear();
m_cachedTimeRegions.clear();
}
void CpuTimingLocalStorage::RegionStackPopBack()
timeRegion.m_stackDepth = static_cast<uint16_t>(m_stackLevel);
AZ_Assert(m_timeRegionStack.size() < TimeRegionStackSize, "Adding too many time regions to the stack. Increase the size of TimeRegionStackSize.");
m_timeRegionStack.push_back(timeRegion);
// Increment the stack
m_stackLevel++;
// Set the starting time at the end, to avoid recording the minor overhead
m_timeRegionStack.back().m_startTick = AZStd::GetTimeNowTicks();
}
void CpuTimingLocalStorage::RegionStackPopBack()
{
// Early out when the stack is empty, this might happen when the profiler was enabled while the thread encountered profiling markers
if (m_timeRegionStack.empty())
{
// Early out when the stack is empty, this might happen when the profiler was enabled while the thread encountered profiling markers
if (m_timeRegionStack.empty())
{
return;
}
return;
}
// Get the end timestamp here, to avoid the minor overhead
const AZStd::sys_time_t endRegionTime = AZStd::GetTimeNowTicks();
// Get the end timestamp here, to avoid the minor overhead
const AZStd::sys_time_t endRegionTime = AZStd::GetTimeNowTicks();
AZ_Assert(!m_timeRegionStack.empty(), "Trying to pop an element in the stack, but it's empty.");
CachedTimeRegion back = m_timeRegionStack.back();
m_timeRegionStack.pop_back();
AZ_Assert(!m_timeRegionStack.empty(), "Trying to pop an element in the stack, but it's empty.");
CachedTimeRegion back = m_timeRegionStack.back();
m_timeRegionStack.pop_back();
// Set the ending time
back.m_endTick = endRegionTime;
// Set the ending time
back.m_endTick = endRegionTime;
// Decrement the stack
m_stackLevel--;
// Decrement the stack
m_stackLevel--;
// Add an entry to the cached region
AddCachedRegion(back);
}
// Add an entry to the cached region
AddCachedRegion(back);
}
// Gets called when region ends and all data is set
void CpuTimingLocalStorage::AddCachedRegion(const CachedTimeRegion& timeRegionCached)
// Gets called when region ends and all data is set
void CpuTimingLocalStorage::AddCachedRegion(const CachedTimeRegion& timeRegionCached)
{
if (m_hitSizeLimitMap[timeRegionCached.m_groupRegionName.m_regionName])
{
if (m_hitSizeLimitMap[timeRegionCached.m_groupRegionName.m_regionName])
{
return;
}
// Add an entry to the cached region
m_cachedTimeRegions.push_back(timeRegionCached);
// If the stack is empty, add it to the local cache map. Only gets called when the stack is empty
// NOTE: this is where the largest overhead will be, but due to it only being called when the stack is empty
// (i.e when the root region ended), this overhead won't affect any time regions.
// The exception being for functions that are being profiled and create/spawn threads that are also profiled. Unfortunately, in this
// case, the overhead of the profiled threads will be added to the main thread.
if (m_timeRegionStack.empty())
{
AZStd::unique_lock<AZStd::mutex> lock(m_cachedTimeRegionMutex);
return;
}
// Add an entry to the cached region
m_cachedTimeRegions.push_back(timeRegionCached);
// If the stack is empty, add it to the local cache map. Only gets called when the stack is empty
// NOTE: this is where the largest overhead will be, but due to it only being called when the stack is empty
// (i.e when the root region ended), this overhead won't affect any time regions.
// The exception being for functions that are being profiled and create/spawn threads that are also profiled. Unfortunately, in this
// case, the overhead of the profiled threads will be added to the main thread.
if (m_timeRegionStack.empty())
{
AZStd::unique_lock<AZStd::mutex> lock(m_cachedTimeRegionMutex);
// Add the cached regions to the map
for (auto& cachedTimeRegion : m_cachedTimeRegions)
// Add the cached regions to the map
for (auto& cachedTimeRegion : m_cachedTimeRegions)
{
const AZStd::string regionName = cachedTimeRegion.m_groupRegionName.m_regionName;
AZStd::vector<CachedTimeRegion>& regionVec = m_cachedTimeRegionMap[regionName];
regionVec.push_back(cachedTimeRegion);
if (regionVec.size() >= TimeRegionStackSize)
{
const AZStd::string regionName = cachedTimeRegion.m_groupRegionName.m_regionName;
AZStd::vector<CachedTimeRegion>& regionVec = m_cachedTimeRegionMap[regionName];
regionVec.push_back(cachedTimeRegion);
if (regionVec.size() >= TimeRegionStackSize)
{
m_hitSizeLimitMap.insert_or_assign(AZStd::move(regionName), true);
}
m_hitSizeLimitMap.insert_or_assign(AZStd::move(regionName), true);
}
// Clear the cached regions
m_cachedTimeRegions.clear();
}
// Clear the cached regions
m_cachedTimeRegions.clear();
}
}
void CpuTimingLocalStorage::TryFlushCachedMap(CpuProfiler::ThreadTimeRegionMap& cachedTimeRegionMap)
void CpuTimingLocalStorage::TryFlushCachedMap(CpuProfiler::ThreadTimeRegionMap& cachedTimeRegionMap)
{
// Try to lock, if it's already in use (the cached regions in the array are being copied to the map)
// it'll show up in the next iteration when the user requests it.
if (m_cachedTimeRegionMutex.try_lock())
{
// Try to lock, if it's already in use (the cached regions in the array are being copied to the map)
// it'll show up in the next iteration when the user requests it.
if (m_cachedTimeRegionMutex.try_lock())
// Only flush cached time regions if there are entries available
if (!m_cachedTimeRegionMap.empty())
{
// Only flush cached time regions if there are entries available
if (!m_cachedTimeRegionMap.empty())
{
cachedTimeRegionMap = AZStd::move(m_cachedTimeRegionMap);
m_cachedTimeRegionMap.clear();
m_hitSizeLimitMap.clear();
}
m_cachedTimeRegionMutex.unlock();
cachedTimeRegionMap = AZStd::move(m_cachedTimeRegionMap);
m_cachedTimeRegionMap.clear();
m_hitSizeLimitMap.clear();
}
m_cachedTimeRegionMutex.unlock();
}
}
// --- CpuProfilingStatisticsSerializer ---
// --- CpuProfilingStatisticsSerializer ---
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& continuousData)
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& continuousData)
{
// Create serializable entries
for (const auto& timeRegionMap : continuousData)
{
// Create serializable entries
for (const auto& timeRegionMap : continuousData)
for (const auto& [threadId, regionMap] : timeRegionMap)
{
for (const auto& [threadId, regionMap] : timeRegionMap)
for (const auto& [regionName, regionVec] : regionMap)
{
for (const auto& [regionName, regionVec] : regionMap)
for (const auto& region : regionVec)
{
for (const auto& region : regionVec)
{
m_cpuProfilingStatisticsSerializerEntries.emplace_back(region, threadId);
}
m_cpuProfilingStatisticsSerializerEntries.emplace_back(region, threadId);
}
}
}
}
}
void CpuProfilingStatisticsSerializer::Reflect(AZ::ReflectContext* context)
void CpuProfilingStatisticsSerializer::Reflect(AZ::ReflectContext* context)
{
if (auto* serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
{
if (auto* serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
{
serializeContext->Class<CpuProfilingStatisticsSerializer>()
->Version(1)
->Field("cpuProfilingStatisticsSerializerEntries", &CpuProfilingStatisticsSerializer::m_cpuProfilingStatisticsSerializerEntries)
;
}
CpuProfilingStatisticsSerializerEntry::Reflect(context);
serializeContext->Class<CpuProfilingStatisticsSerializer>()
->Version(1)
->Field("cpuProfilingStatisticsSerializerEntries", &CpuProfilingStatisticsSerializer::m_cpuProfilingStatisticsSerializerEntries)
;
}
// --- CpuProfilingStatisticsSerializerEntry ---
CpuProfilingStatisticsSerializerEntry::Reflect(context);
}
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::CpuProfilingStatisticsSerializerEntry(
const RHI::CachedTimeRegion& cachedTimeRegion, AZStd::thread_id threadId)
{
m_groupName = cachedTimeRegion.m_groupRegionName.m_groupName;
m_regionName = cachedTimeRegion.m_groupRegionName.m_regionName;
m_stackDepth = cachedTimeRegion.m_stackDepth;
m_startTick = cachedTimeRegion.m_startTick;
m_endTick = cachedTimeRegion.m_endTick;
m_threadId = AZStd::hash<AZStd::thread_id>{}(threadId);
}
// --- CpuProfilingStatisticsSerializerEntry ---
void CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::Reflect(AZ::ReflectContext* context)
CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::CpuProfilingStatisticsSerializerEntry(
const RHI::CachedTimeRegion& cachedTimeRegion, AZStd::thread_id threadId)
{
m_groupName = cachedTimeRegion.m_groupRegionName.m_groupName;
m_regionName = cachedTimeRegion.m_groupRegionName.m_regionName;
m_stackDepth = cachedTimeRegion.m_stackDepth;
m_startTick = cachedTimeRegion.m_startTick;
m_endTick = cachedTimeRegion.m_endTick;
m_threadId = AZStd::hash<AZStd::thread_id>{}(threadId);
}
void CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry::Reflect(AZ::ReflectContext* context)
{
if (auto* serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
{
if (auto* serializeContext = azrtti_cast<AZ::SerializeContext*>(context))
{
serializeContext->Class<CpuProfilingStatisticsSerializerEntry>()
->Version(1)
->Field("groupName", &CpuProfilingStatisticsSerializerEntry::m_groupName)
->Field("regionName", &CpuProfilingStatisticsSerializerEntry::m_regionName)
->Field("stackDepth", &CpuProfilingStatisticsSerializerEntry::m_stackDepth)
->Field("startTick", &CpuProfilingStatisticsSerializerEntry::m_startTick)
->Field("endTick", &CpuProfilingStatisticsSerializerEntry::m_endTick)
->Field("threadId", &CpuProfilingStatisticsSerializerEntry::m_threadId)
;
}
serializeContext->Class<CpuProfilingStatisticsSerializerEntry>()
->Version(1)
->Field("groupName", &CpuProfilingStatisticsSerializerEntry::m_groupName)
->Field("regionName", &CpuProfilingStatisticsSerializerEntry::m_regionName)
->Field("stackDepth", &CpuProfilingStatisticsSerializerEntry::m_stackDepth)
->Field("startTick", &CpuProfilingStatisticsSerializerEntry::m_startTick)
->Field("endTick", &CpuProfilingStatisticsSerializerEntry::m_endTick)
->Field("threadId", &CpuProfilingStatisticsSerializerEntry::m_threadId)
;
}
} // namespace RHI
} // namespace AZ
}
} // namespace Profiler

@ -20,169 +20,166 @@
#include <AzCore/std/smart_ptr/intrusive_refcount.h>
namespace AZ
namespace Profiler
{
namespace RHI
//! Thread local class to keep track of the thread's cached time regions.
//! Each thread keeps track of its own time regions, which is communicated from the CpuProfilerImpl.
//! The CpuProfilerImpl is able to request the cached time regions from the CpuTimingLocalStorage.
class CpuTimingLocalStorage :
public AZStd::intrusive_refcount<AZStd::atomic_uint>
{
//! Thread local class to keep track of the thread's cached time regions.
//! Each thread keeps track of its own time regions, which is communicated from the CpuProfilerImpl.
//! The CpuProfilerImpl is able to request the cached time regions from the CpuTimingLocalStorage.
class CpuTimingLocalStorage :
public AZStd::intrusive_refcount<AZStd::atomic_uint>
{
friend class CpuProfilerImpl;
friend class CpuProfilerImpl;
public:
AZ_CLASS_ALLOCATOR(CpuTimingLocalStorage, AZ::OSAllocator, 0);
public:
AZ_CLASS_ALLOCATOR(CpuTimingLocalStorage, AZ::OSAllocator, 0);
CpuTimingLocalStorage();
~CpuTimingLocalStorage();
CpuTimingLocalStorage();
~CpuTimingLocalStorage();
private:
// Maximum stack size
static constexpr uint32_t TimeRegionStackSize = 2048u;
private:
// Maximum stack size
static constexpr uint32_t TimeRegionStackSize = 2048u;
// Adds a region to the stack, gets called each time a region begins
void RegionStackPushBack(CachedTimeRegion& timeRegion);
// Adds a region to the stack, gets called each time a region begins
void RegionStackPushBack(CachedTimeRegion& timeRegion);
// Pops a region from the stack, gets called each time a region ends
void RegionStackPopBack();
// Pops a region from the stack, gets called each time a region ends
void RegionStackPopBack();
// Add a new cached time region. If the stack is empty, flush all entries to the cached map
void AddCachedRegion(const CachedTimeRegion& timeRegionCached);
// Add a new cached time region. If the stack is empty, flush all entries to the cached map
void AddCachedRegion(const CachedTimeRegion& timeRegionCached);
// Tries to flush the map to the passed parameter, only if the thread's mutex is unlocked
void TryFlushCachedMap(CpuProfiler::ThreadTimeRegionMap& cachedRegionMap);
// Tries to flush the map to the passed parameter, only if the thread's mutex is unlocked
void TryFlushCachedMap(CpuProfiler::ThreadTimeRegionMap& cachedRegionMap);
AZStd::thread_id m_executingThreadId;
// Keeps track of the current thread's stack depth
uint32_t m_stackLevel = 0u;
AZStd::thread_id m_executingThreadId;
// Keeps track of the current thread's stack depth
uint32_t m_stackLevel = 0u;
// Cached region map, will be flushed to the system's map when the system requests it
CpuProfiler::ThreadTimeRegionMap m_cachedTimeRegionMap;
// Cached region map, will be flushed to the system's map when the system requests it
CpuProfiler::ThreadTimeRegionMap m_cachedTimeRegionMap;
// Use fixed vectors to avoid re-allocating new elements
// Keeps track of the regions that added and removed using the macro
AZStd::fixed_vector<CachedTimeRegion, TimeRegionStackSize> m_timeRegionStack;
// Use fixed vectors to avoid re-allocating new elements
// Keeps track of the regions that added and removed using the macro
AZStd::fixed_vector<CachedTimeRegion, TimeRegionStackSize> m_timeRegionStack;
// Keeps track of regions that completed (i.e regions that was pushed and popped from the stack)
// Intermediate storage point for the CachedTimeRegions, when the stack is empty, all entries will be
// copied to the map.
AZStd::fixed_vector<CachedTimeRegion, TimeRegionStackSize> m_cachedTimeRegions;
AZStd::mutex m_cachedTimeRegionMutex;
// Keeps track of regions that completed (i.e regions that was pushed and popped from the stack)
// Intermediate storage point for the CachedTimeRegions, when the stack is empty, all entries will be
// copied to the map.
AZStd::fixed_vector<CachedTimeRegion, TimeRegionStackSize> m_cachedTimeRegions;
AZStd::mutex m_cachedTimeRegionMutex;
// Dirty flag which is set when the CpuProfiler's enabled state is set from false to true
AZStd::atomic_bool m_clearContainers = false;
// Dirty flag which is set when the CpuProfiler's enabled state is set from false to true
AZStd::atomic_bool m_clearContainers = false;
// When the thread is terminated, it will flag itself for deletion
AZStd::atomic_bool m_deleteFlag = false;
// When the thread is terminated, it will flag itself for deletion
AZStd::atomic_bool m_deleteFlag = false;
// Keep track of the regions that have hit the size limit so we don't have to lock to check
AZStd::map<AZStd::string, bool> m_hitSizeLimitMap;
};
// Keep track of the regions that have hit the size limit so we don't have to lock to check
AZStd::map<AZStd::string, bool> m_hitSizeLimitMap;
};
//! CpuProfiler will keep track of the registered threads, and
//! forwards the request to profile a region to the appropriate thread. The user is able to request all
//! cached regions, which are stored on a per thread frequency.
class CpuProfilerImpl final
: public AZ::Debug::Profiler
, public CpuProfiler
, public SystemTickBus::Handler
{
friend class CpuTimingLocalStorage;
//! CpuProfiler will keep track of the registered threads, and
//! forwards the request to profile a region to the appropriate thread. The user is able to request all
//! cached regions, which are stored on a per thread frequency.
class CpuProfilerImpl final
: public AZ::Debug::Profiler
, public CpuProfiler
, public SystemTickBus::Handler
{
friend class CpuTimingLocalStorage;
public:
AZ_TYPE_INFO(CpuProfilerImpl, "{10E9D394-FC83-4B45-B2B8-807C6BF07BF0}");
AZ_CLASS_ALLOCATOR(CpuProfilerImpl, AZ::OSAllocator, 0);
public:
AZ_TYPE_INFO(CpuProfilerImpl, "{10E9D394-FC83-4B45-B2B8-807C6BF07BF0}");
AZ_CLASS_ALLOCATOR(CpuProfilerImpl, AZ::OSAllocator, 0);
CpuProfilerImpl() = default;
~CpuProfilerImpl() = default;
CpuProfilerImpl() = default;
~CpuProfilerImpl() = default;
//! Registers the CpuProfilerImpl instance to the interface
void Init();
//! Unregisters the CpuProfilerImpl instance from the interface
void Shutdown();
//! Registers the CpuProfilerImpl instance to the interface
void Init();
//! Unregisters the CpuProfilerImpl instance from the interface
void Shutdown();
// SystemTickBus::Handler overrides
// When fired, the profiler collects all profiling data from registered threads and updates
// m_timeRegionMap so that the next frame has up-to-date profiling data.
void OnSystemTick() final override;
// SystemTickBus::Handler overrides
// When fired, the profiler collects all profiling data from registered threads and updates
// m_timeRegionMap so that the next frame has up-to-date profiling data.
void OnSystemTick() final override;
//! AZ::Debug::Profiler overrides...
void BeginRegion(const AZ::Debug::Budget* budget, const char* eventName) final override;
void EndRegion(const AZ::Debug::Budget* budget) final override;
//! AZ::Debug::Profiler overrides...
void BeginRegion(const AZ::Debug::Budget* budget, const char* eventName) final override;
void EndRegion(const AZ::Debug::Budget* budget) final override;
//! CpuProfiler overrides...
const TimeRegionMap& GetTimeRegionMap() const final override;
bool BeginContinuousCapture() final override;
bool EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget) final override;
bool IsContinuousCaptureInProgress() const final override;
void SetProfilerEnabled(bool enabled) final override;
bool IsProfilerEnabled() const final override;
//! CpuProfiler overrides...
const TimeRegionMap& GetTimeRegionMap() const final override;
bool BeginContinuousCapture() final override;
bool EndContinuousCapture(AZStd::ring_buffer<TimeRegionMap>& flushTarget) final override;
bool IsContinuousCaptureInProgress() const final override;
void SetProfilerEnabled(bool enabled) final override;
bool IsProfilerEnabled() const final override;
private:
static constexpr AZStd::size_t MaxFramesToSave = 2 * 60 * 120; // 2 minutes of 120fps
static constexpr AZStd::size_t MaxRegionStringPoolSize = 16384; // Max amount of unique strings to save in the pool before throwing warnings.
private:
static constexpr AZStd::size_t MaxFramesToSave = 2 * 60 * 120; // 2 minutes of 120fps
static constexpr AZStd::size_t MaxRegionStringPoolSize = 16384; // Max amount of unique strings to save in the pool before throwing warnings.
// Lazily create and register the local thread data
void RegisterThreadStorage();
// Lazily create and register the local thread data
void RegisterThreadStorage();
// ThreadId -> ThreadTimeRegionMap
// On the start of each frame, this map will be updated with the last frame's profiling data.
TimeRegionMap m_timeRegionMap;
// ThreadId -> ThreadTimeRegionMap
// On the start of each frame, this map will be updated with the last frame's profiling data.
TimeRegionMap m_timeRegionMap;
// Set of registered threads when created
AZStd::vector<RHI::Ptr<CpuTimingLocalStorage>, AZ::OSStdAllocator> m_registeredThreads;
AZStd::mutex m_threadRegisterMutex;
// Set of registered threads when created
AZStd::vector<RHI::Ptr<CpuTimingLocalStorage>, AZ::OSStdAllocator> m_registeredThreads;
AZStd::mutex m_threadRegisterMutex;
// Thread local storage, gets lazily allocated when a thread is created
static thread_local CpuTimingLocalStorage* ms_threadLocalStorage;
// Thread local storage, gets lazily allocated when a thread is created
static thread_local CpuTimingLocalStorage* ms_threadLocalStorage;
// Enable/Disables the threads from profiling
AZStd::atomic_bool m_enabled = false;
// Enable/Disables the threads from profiling
AZStd::atomic_bool m_enabled = false;
// This lock will only be contested when the CpuProfiler's Shutdown() method has been called
AZStd::shared_mutex m_shutdownMutex;
// This lock will only be contested when the CpuProfiler's Shutdown() method has been called
AZStd::shared_mutex m_shutdownMutex;
bool m_initialized = false;
bool m_initialized = false;
AZStd::mutex m_continuousCaptureEndingMutex;
AZStd::mutex m_continuousCaptureEndingMutex;
AZStd::atomic_bool m_continuousCaptureInProgress;
AZStd::atomic_bool m_continuousCaptureInProgress;
// Stores multiple frames of profiling data, size is controlled by MaxFramesToSave. Flushed when EndContinuousCapture is called.
// Ring buffer so that we can have fast append of new data + removal of old profiling data with good cache locality.
AZStd::ring_buffer<TimeRegionMap> m_continuousCaptureData;
};
// Stores multiple frames of profiling data, size is controlled by MaxFramesToSave. Flushed when EndContinuousCapture is called.
// Ring buffer so that we can have fast append of new data + removal of old profiling data with good cache locality.
AZStd::ring_buffer<TimeRegionMap> m_continuousCaptureData;
};
// Intermediate class to serialize Cpu TimedRegion data.
class CpuProfilingStatisticsSerializer
// Intermediate class to serialize Cpu TimedRegion data.
class CpuProfilingStatisticsSerializer
{
public:
class CpuProfilingStatisticsSerializerEntry
{
public:
class CpuProfilingStatisticsSerializerEntry
{
public:
AZ_TYPE_INFO(CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry, "{26B78F65-EB96-46E2-BE7E-A1233880B225}");
static void Reflect(AZ::ReflectContext* context);
CpuProfilingStatisticsSerializerEntry() = default;
CpuProfilingStatisticsSerializerEntry(const RHI::CachedTimeRegion& cachedTimeRegion, AZStd::thread_id threadId);
Name m_groupName;
Name m_regionName;
uint16_t m_stackDepth;
AZStd::sys_time_t m_startTick;
AZStd::sys_time_t m_endTick;
size_t m_threadId;
};
AZ_TYPE_INFO(CpuProfilingStatisticsSerializer, "{D5B02946-0D27-474F-9A44-364C2706DD41}");
AZ_TYPE_INFO(CpuProfilingStatisticsSerializer::CpuProfilingStatisticsSerializerEntry, "{26B78F65-EB96-46E2-BE7E-A1233880B225}");
static void Reflect(AZ::ReflectContext* context);
CpuProfilingStatisticsSerializer() = default;
CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& continuousData);
CpuProfilingStatisticsSerializerEntry() = default;
CpuProfilingStatisticsSerializerEntry(const RHI::CachedTimeRegion& cachedTimeRegion, AZStd::thread_id threadId);
AZStd::vector<CpuProfilingStatisticsSerializerEntry> m_cpuProfilingStatisticsSerializerEntries;
Name m_groupName;
Name m_regionName;
uint16_t m_stackDepth;
AZStd::sys_time_t m_startTick;
AZStd::sys_time_t m_endTick;
size_t m_threadId;
};
}; // namespace RHI
}; // namespace AZ
AZ_TYPE_INFO(CpuProfilingStatisticsSerializer, "{D5B02946-0D27-474F-9A44-364C2706DD41}");
static void Reflect(AZ::ReflectContext* context);
CpuProfilingStatisticsSerializer() = default;
CpuProfilingStatisticsSerializer(const AZStd::ring_buffer<RHI::CpuProfiler::TimeRegionMap>& continuousData);
AZStd::vector<CpuProfilingStatisticsSerializerEntry> m_cpuProfilingStatisticsSerializerEntries;
};
}; // namespace Profiler

File diff suppressed because it is too large Load Diff

@ -15,217 +15,214 @@
#include <Atom/RHI/CpuProfiler.h>
namespace AZ
namespace Profiler
{
namespace Render
//! Stores all the data associated with a row in the table.
struct TableRow
{
//! Stores all the data associated with a row in the table.
struct TableRow
template <typename T>
struct TableRowCompareFunctor
{
template <typename T>
struct TableRowCompareFunctor
TableRowCompareFunctor(T memberPointer, bool isAscending) : m_memberPointer(memberPointer), m_ascending(isAscending){};
bool operator()(const TableRow* lhs, const TableRow* rhs)
{
TableRowCompareFunctor(T memberPointer, bool isAscending) : m_memberPointer(memberPointer), m_ascending(isAscending){};
return m_ascending ? lhs->*m_memberPointer < rhs->*m_memberPointer : lhs->*m_memberPointer > rhs->*m_memberPointer;
}
bool operator()(const TableRow* lhs, const TableRow* rhs)
{
return m_ascending ? lhs->*m_memberPointer < rhs->*m_memberPointer : lhs->*m_memberPointer > rhs->*m_memberPointer;
}
T m_memberPointer;
bool m_ascending;
};
T m_memberPointer;
bool m_ascending;
};
// Update running statistics with new region data
void RecordRegion(const AZ::RHI::CachedTimeRegion& region, size_t threadId);
// Update running statistics with new region data
void RecordRegion(const AZ::RHI::CachedTimeRegion& region, size_t threadId);
void ResetPerFrameStatistics();
void ResetPerFrameStatistics();
// Get a string of all threads that this region executed in during the last frame
AZStd::string GetExecutingThreadsLabel() const;
// Get a string of all threads that this region executed in during the last frame
AZStd::string GetExecutingThreadsLabel() const;
AZStd::string m_groupName;
AZStd::string m_regionName;
AZStd::string m_groupName;
AZStd::string m_regionName;
// --- Per frame statistics ---
// --- Per frame statistics ---
u64 m_invocationsLastFrame = 0;
u64 m_invocationsLastFrame = 0;
// NOTE: set over unordered_set so the threads can be shown in increasing order in tooltip.
AZStd::set<size_t> m_executingThreads;
// NOTE: set over unordered_set so the threads can be shown in increasing order in tooltip.
AZStd::set<size_t> m_executingThreads;
AZStd::sys_time_t m_lastFrameTotalTicks = 0;
AZStd::sys_time_t m_lastFrameTotalTicks = 0;
// Maximum execution time of a region in the last frame.
AZStd::sys_time_t m_maxTicks = 0;
// Maximum execution time of a region in the last frame.
AZStd::sys_time_t m_maxTicks = 0;
// --- Aggregate statistics ---
// --- Aggregate statistics ---
u64 m_invocationsTotal = 0;
u64 m_invocationsTotal = 0;
// Running average of Mean Time Per Call
AZStd::sys_time_t m_runningAverageTicks = 0;
};
// Running average of Mean Time Per Call
AZStd::sys_time_t m_runningAverageTicks = 0;
};
//! ImGui widget for examining Atom CPU Profiling instrumentation.
//! Offers both a statistical view (with sorting and searching capability) and a visualizer
//! similar to RAD and other profiling tools.
class ImGuiCpuProfiler
: SystemTickBus::Handler
{
// Region Name -> statistical view row data
using RegionRowMap = AZStd::map<AZStd::string, TableRow>;
// Group Name -> RegionRowMap
using GroupRegionMap = AZStd::map<AZStd::string, RegionRowMap>;
//! ImGui widget for examining Atom CPU Profiling instrumentation.
//! Offers both a statistical view (with sorting and searching capability) and a visualizer
//! similar to RAD and other profiling tools.
class ImGuiCpuProfiler
: SystemTickBus::Handler
{
// Region Name -> statistical view row data
using RegionRowMap = AZStd::map<AZStd::string, TableRow>;
// Group Name -> RegionRowMap
using GroupRegionMap = AZStd::map<AZStd::string, RegionRowMap>;
using TimeRegion = AZ::RHI::CachedTimeRegion;
using GroupRegionName = AZ::RHI::CachedTimeRegion::GroupRegionName;
using TimeRegion = AZ::RHI::CachedTimeRegion;
using GroupRegionName = AZ::RHI::CachedTimeRegion::GroupRegionName;
public:
struct CpuTimingEntry
{
const AZStd::string& m_name;
double m_executeDuration;
};
public:
struct CpuTimingEntry
{
const AZStd::string& m_name;
double m_executeDuration;
};
ImGuiCpuProfiler() = default;
~ImGuiCpuProfiler() = default;
ImGuiCpuProfiler() = default;
~ImGuiCpuProfiler() = default;
//! Draws the overall CPU profiling window, defaults to the statistical view
void Draw(bool& keepDrawing);
//! Draws the overall CPU profiling window, defaults to the statistical view
void Draw(bool& keepDrawing);
private:
static constexpr float RowHeight = 35.0;
static constexpr int DefaultFramesToCollect = 50;
static constexpr float MediumFrameTimeLimit = 16.6; // 60 fps
static constexpr float HighFrameTimeLimit = 33.3; // 30 fps
private:
static constexpr float RowHeight = 35.0;
static constexpr int DefaultFramesToCollect = 50;
static constexpr float MediumFrameTimeLimit = 16.6; // 60 fps
static constexpr float HighFrameTimeLimit = 33.3; // 30 fps
//! Draws the statistical view of the CPU profiling data.
void DrawStatisticsView();
//! Draws the statistical view of the CPU profiling data.
void DrawStatisticsView();
//! Callback invoked when the "Load File" button is pressed in the file picker.
void LoadFile();
//! Callback invoked when the "Load File" button is pressed in the file picker.
void LoadFile();
//! Draws the file picker window.
void DrawFilePicker();
//! Draws the file picker window.
void DrawFilePicker();
//! Draws the CPU profiling visualizer.
void DrawVisualizer();
//! Draws the CPU profiling visualizer.
void DrawVisualizer();
// Draw the shared header between the two windows.
void DrawCommonHeader();
// Draw the shared header between the two windows.
void DrawCommonHeader();
// Draw the region statistics table in the order specified by the pointers in m_tableData.
void DrawTable();
// Draw the region statistics table in the order specified by the pointers in m_tableData.
void DrawTable();
// Sort the table by a given column, rearranges the pointers in m_tableData.
void SortTable(ImGuiTableSortSpecs* sortSpecs);
// Sort the table by a given column, rearranges the pointers in m_tableData.
void SortTable(ImGuiTableSortSpecs* sortSpecs);
// gather the latest timing statistics
void CacheCpuTimingStatistics();
// gather the latest timing statistics
void CacheCpuTimingStatistics();
// Get the profiling data from the last frame, only called when the profiler is not paused.
void CollectFrameData();
// Get the profiling data from the last frame, only called when the profiler is not paused.
void CollectFrameData();
// Cull old data from internal storage, only called when profiler is not paused.
void CullFrameData();
// Cull old data from internal storage, only called when profiler is not paused.
void CullFrameData();
// Draws a single block onto the timeline into the specified row
void DrawBlock(const TimeRegion& block, u64 targetRow);
// Draws a single block onto the timeline into the specified row
void DrawBlock(const TimeRegion& block, u64 targetRow);
// Draw horizontal lines between threads in the timeline
void DrawThreadSeparator(u64 threadBoundary, u64 maxDepth);
// Draw horizontal lines between threads in the timeline
void DrawThreadSeparator(u64 threadBoundary, u64 maxDepth);
// Draw the "Thread XXXXX" label onto the viewport
void DrawThreadLabel(u64 baseRow, size_t threadId);
// Draw the "Thread XXXXX" label onto the viewport
void DrawThreadLabel(u64 baseRow, size_t threadId);
// Draw the vertical lines separating frames in the timeline
void DrawFrameBoundaries();
// Draw the vertical lines separating frames in the timeline
void DrawFrameBoundaries();
// Draw the ruler with frame time labels
void DrawRuler();
// Draw the ruler with frame time labels
void DrawRuler();
// Draw the frame time histogram
void DrawFrameTimeHistogram();
// Draw the frame time histogram
void DrawFrameTimeHistogram();
// Converts raw ticks to a pixel value suitable to give to ImDrawList, handles window scrolling
float ConvertTickToPixelSpace(AZStd::sys_time_t tick, AZStd::sys_time_t leftBound, AZStd::sys_time_t rightBound) const;
// Converts raw ticks to a pixel value suitable to give to ImDrawList, handles window scrolling
float ConvertTickToPixelSpace(AZStd::sys_time_t tick, AZStd::sys_time_t leftBound, AZStd::sys_time_t rightBound) const;
AZStd::sys_time_t GetViewportTickWidth() const;
AZStd::sys_time_t GetViewportTickWidth() const;
// Gets the color for a block using the GroupRegionName as a key into the cache.
// Generates a random ImU32 if the block does not yet have a color.
ImU32 GetBlockColor(const TimeRegion& block);
// Gets the color for a block using the GroupRegionName as a key into the cache.
// Generates a random ImU32 if the block does not yet have a color.
ImU32 GetBlockColor(const TimeRegion& block);
// System tick bus overrides
virtual void OnSystemTick() override;
// System tick bus overrides
virtual void OnSystemTick() override;
// --- Visualizer Members ---
// --- Visualizer Members ---
int m_framesToCollect = DefaultFramesToCollect;
int m_framesToCollect = DefaultFramesToCollect;
// Tally of the number of saved profiling events so far
u64 m_savedRegionCount = 0;
// Tally of the number of saved profiling events so far
u64 m_savedRegionCount = 0;
// Viewport tick bounds, these are used to convert tick space -> screen space and cull so we only draw onscreen objects
AZStd::sys_time_t m_viewportStartTick;
AZStd::sys_time_t m_viewportEndTick;
// Viewport tick bounds, these are used to convert tick space -> screen space and cull so we only draw onscreen objects
AZStd::sys_time_t m_viewportStartTick;
AZStd::sys_time_t m_viewportEndTick;
// Map to store each thread's TimeRegions, individual vectors are sorted by start tick
// note: we use size_t as a proxy for thread_id because native_thread_id_type differs differs from
// platform to platform, which causes problems when deserializing saved captures.
AZStd::unordered_map<size_t, AZStd::vector<TimeRegion>> m_savedData;
// Map to store each thread's TimeRegions, individual vectors are sorted by start tick
// note: we use size_t as a proxy for thread_id because native_thread_id_type differs differs from
// platform to platform, which causes problems when deserializing saved captures.
AZStd::unordered_map<size_t, AZStd::vector<TimeRegion>> m_savedData;
// Region color cache
AZStd::unordered_map<GroupRegionName, ImVec4, RHI::CachedTimeRegion::GroupRegionName::Hash> m_regionColorMap;
// Region color cache
AZStd::unordered_map<GroupRegionName, ImVec4, RHI::CachedTimeRegion::GroupRegionName::Hash> m_regionColorMap;
// Tracks the frame boundaries
AZStd::vector<AZStd::sys_time_t> m_frameEndTicks = { INT64_MIN };
// Tracks the frame boundaries
AZStd::vector<AZStd::sys_time_t> m_frameEndTicks = { INT64_MIN };
// Filter for highlighting regions on the visualizer
ImGuiTextFilter m_visualizerHighlightFilter;
// Filter for highlighting regions on the visualizer
ImGuiTextFilter m_visualizerHighlightFilter;
// --- Tabular view members ---
// --- Tabular view members ---
// ImGui filter used to filter TimedRegions.
ImGuiTextFilter m_timedRegionFilter;
// ImGui filter used to filter TimedRegions.
ImGuiTextFilter m_timedRegionFilter;
// Saves statistical view data organized by group name -> region name -> row data
GroupRegionMap m_groupRegionMap;
// Saves statistical view data organized by group name -> region name -> row data
GroupRegionMap m_groupRegionMap;
// Saves pointers to objects in m_groupRegionMap, order reflects table ordering.
// Non-owning, will be cleared when m_groupRegionMap is cleared.
AZStd::vector<TableRow*> m_tableData;
// Saves pointers to objects in m_groupRegionMap, order reflects table ordering.
// Non-owning, will be cleared when m_groupRegionMap is cleared.
AZStd::vector<TableRow*> m_tableData;
// Pause cpu profiling. The profiler will show the statistics of the last frame before pause.
bool m_paused = false;
// Pause cpu profiling. The profiler will show the statistics of the last frame before pause.
bool m_paused = false;
// Export the profiling data from a single frame to a local file.
bool m_captureToFile = false;
// Export the profiling data from a single frame to a local file.
bool m_captureToFile = false;
// Toggle between the normal statistical view and the visual profiling view.
bool m_enableVisualizer = false;
// Toggle between the normal statistical view and the visual profiling view.
bool m_enableVisualizer = false;
// Last captured CPU timing statistics
AZStd::vector<CpuTimingEntry> m_cpuTimingStatisticsWhenPause;
AZStd::sys_time_t m_frameToFrameTime{};
// Last captured CPU timing statistics
AZStd::vector<CpuTimingEntry> m_cpuTimingStatisticsWhenPause;
AZStd::sys_time_t m_frameToFrameTime{};
AZStd::string m_lastCapturedFilePath;
AZStd::string m_lastCapturedFilePath;
bool m_showFilePicker = false;
bool m_showFilePicker = false;
// Cached file paths to previous traces on disk, sorted with the most recent trace at the front.
AZStd::vector<IO::Path> m_cachedCapturePaths;
// Cached file paths to previous traces on disk, sorted with the most recent trace at the front.
AZStd::vector<IO::Path> m_cachedCapturePaths;
// Index into the file picker, used to determine which file to load when "Load File" is pressed.
int m_currentFileIndex = 0;
// Index into the file picker, used to determine which file to load when "Load File" is pressed.
int m_currentFileIndex = 0;
// --- Loading capture state ---
AZStd::unordered_set<AZStd::string> m_deserializedStringPool;
AZStd::unordered_set<RHI::CachedTimeRegion::GroupRegionName, RHI::CachedTimeRegion::GroupRegionName::Hash> m_deserializedGroupRegionNamePool;
};
} // namespace Render
} // namespace AZ
// --- Loading capture state ---
AZStd::unordered_set<AZStd::string> m_deserializedStringPool;
AZStd::unordered_set<RHI::CachedTimeRegion::GroupRegionName, RHI::CachedTimeRegion::GroupRegionName::Hash> m_deserializedGroupRegionNamePool;
};
} // namespace Profiler
#include "ImGuiCpuProfiler.inl"

Loading…
Cancel
Save