From 2f57d725611b559fe968d4f730ff4d7bd46edfe7 Mon Sep 17 00:00:00 2001 From: Jeremy Ong Date: Mon, 2 Aug 2021 23:50:44 -0600 Subject: [PATCH] Add initial JobGraph prototype Signed-off-by: Jeremy Ong --- .../AzCore/Jobs/Internal/JobTypeEraser.cpp | 76 ++ .../AzCore/Jobs/Internal/JobTypeEraser.h | 229 +++++ .../AzCore/AzCore/Jobs/JobDescriptor.h | 49 + .../AzCore/AzCore/Jobs/JobExecutor.cpp | 361 ++++++++ .../AzCore/AzCore/Jobs/JobExecutor.h | 86 ++ .../Framework/AzCore/AzCore/Jobs/JobGraph.cpp | 60 ++ Code/Framework/AzCore/AzCore/Jobs/JobGraph.h | 139 +++ .../Framework/AzCore/AzCore/Jobs/JobGraph.inl | 62 ++ .../AzCore/AzCore/azcore_files.cmake | 8 + .../AzCore/AzCore/std/parallel/thread.h | 3 +- Code/Framework/AzCore/Tests/JobGraphTests.cpp | 848 ++++++++++++++++++ .../AzCore/Tests/azcoretests_files.cmake | 1 + 12 files changed, 1920 insertions(+), 2 deletions(-) create mode 100644 Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.cpp create mode 100644 Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.h create mode 100644 Code/Framework/AzCore/AzCore/Jobs/JobDescriptor.h create mode 100644 Code/Framework/AzCore/AzCore/Jobs/JobExecutor.cpp create mode 100644 Code/Framework/AzCore/AzCore/Jobs/JobExecutor.h create mode 100644 Code/Framework/AzCore/AzCore/Jobs/JobGraph.cpp create mode 100644 Code/Framework/AzCore/AzCore/Jobs/JobGraph.h create mode 100644 Code/Framework/AzCore/AzCore/Jobs/JobGraph.inl create mode 100644 Code/Framework/AzCore/Tests/JobGraphTests.cpp diff --git a/Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.cpp b/Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.cpp new file mode 100644 index 0000000000..043f232e32 --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include + +namespace AZ::Internal +{ + TypeErasedJob::TypeErasedJob(TypeErasedJob&& other) noexcept + { + if (!other.m_relocator || other.m_lambda != other.m_buffer) + { + // The type-erased lambda is trivially relocatable OR, the lambda is heap allocated + memcpy(this, &other, sizeof(TypeErasedJob)); + + if (other.m_lambda == other.m_buffer) + { + m_lambda = m_buffer; + } + + // Prevent deletion in the event the lambda had spilled to the heap + other.m_lambda = nullptr; + return; + } + + // At this point, we know the lambda was inlined + m_lambda = m_buffer; + + m_invoker = other.m_invoker; + m_relocator = other.m_relocator; + m_destroyer = other.m_destroyer; + + // We now own the lambda, so clear the moved-from job's destroyer + other.m_destroyer = nullptr; + other.m_invoker = nullptr; + + m_relocator(m_buffer, other.m_buffer); + } + + TypeErasedJob& TypeErasedJob::operator=(TypeErasedJob&& other) noexcept + { + if (this == &other) + { + return *this; + } + + this->~TypeErasedJob(); + + new (this) TypeErasedJob{ AZStd::move(other) }; + + return *this; + } + + TypeErasedJob::~TypeErasedJob() + { + if (m_lambda) + { + if (m_destroyer) + { + // The presence of m_destroyer indicates that the lambda is not trivially destructible + m_destroyer(m_lambda); + } + + if (m_lambda != m_buffer) + { + // We've spilled the lambda into the heap, free its memory + azfree(m_lambda); + } + } + } + +} // namespace AZ::Internal diff --git a/Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.h b/Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.h new file mode 100644 index 0000000000..1455f1311a --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/Internal/JobTypeEraser.h @@ -0,0 +1,229 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace AZ::Internal +{ + using JobInvoke_t = void (*)(void* lambda); + using JobRelocate_t = void (*)(void* dst, void* src); + using JobDestroy_t = void (*)(void* obj); + + class CompiledJobGraph; + + // Lambdas are opaque types and we cannot extract any member function pointers. In order to store lambdas in a + // type erased fashion, we instead use a single function call indirection, invoking the lambda function in a + // static class function which has a stable address in memory. The Erased* methods return addresses to the + // indirect callers of the lambda copy/move assignment operators, call operator, and destructor. + // + // For lambdas that are trivially relocatable, both the returned move and copy assignment function pointers + // will be nullptr. + // + // Lambdas that are trivially destructible will result in a nullptr returned JobDestroy_t pointer. + // + // The class will check that the lambda is copy assignable or movable. + template + class JobTypeEraser final + { + public: + constexpr JobInvoke_t ErasedInvoker() + { + return reinterpret_cast(Invoker); + } + + constexpr JobRelocate_t ErasedRelocator() + { + if constexpr (AZStd::is_trivially_move_constructible_v) + { + return nullptr; + } + else if constexpr (AZStd::is_move_constructible_v) + { + return reinterpret_cast(Mover); + } + else if constexpr (AZStd::is_copy_constructible_v) + { + return reinterpret_cast(Copyer); + } + else + { + static_assert( + false, + "Job lambdas must be either move or copy constructible. Please verify that all captured data is move or copy " + "constructible."); + } + } + + constexpr JobDestroy_t ErasedDestroyer() + { + if constexpr (AZStd::is_trivially_destructible_v) + { + return nullptr; + } + else + { + return reinterpret_cast(Destroyer); + } + } + + private: + constexpr static void Invoker(Lambda* lambda) + { + lambda->operator()(); + } + + constexpr static void Mover(Lambda* dst, Lambda* src) + { + new (dst) Lambda{ AZStd::move(*src) }; + } + + constexpr static void Copyer(Lambda* dst, Lambda* src) + { + new (dst) Lambda{ *src }; + } + + constexpr static void Destroyer(Lambda* lambda) + { + lambda->~Lambda(); + } + }; + + // The TypeErasedJob encapsulates member function pointers to store in a homogeneously-typed container + // The function signature of all lambdas encoded in a TypeErasedJob is void(*)(). The lambdas can capture + // data, in which case the data is inlined in this structure if the payload is less than or equal to the + // buffer size. Otherwise, the data is heap allocated. + class alignas(alignof(max_align_t)) TypeErasedJob final + { + public: + // The inline buffer allows the TypeErasedJob to span two cache lines. Lambdas can capture 56 + // bytes of data (7 pointers/references on a 64-bit machine) before spilling to the heap. + constexpr static size_t BufferSize = 128 - sizeof(size_t) * 6 - sizeof(uint32_t) - sizeof(JobDescriptor); + + TypeErasedJob() = default; + + template + TypeErasedJob(JobDescriptor const& desc, Lambda&& lambda) noexcept + : m_descriptor{desc} + { + JobTypeEraser eraser; + m_invoker = eraser.ErasedInvoker(); + m_relocator = eraser.ErasedRelocator(); + m_destroyer = eraser.ErasedDestroyer(); + + // NOTE: This code is conservative in that extended alignment requirements result in a heap + // spill, even if the lambda could have occupied a portion of the inline buffer with a base + // pointer adjustment. + if constexpr (sizeof(Lambda) <= BufferSize && alignof(Lambda) <= alignof(max_align_t)) + { + TypedRelocate(AZStd::forward(lambda), m_buffer); + m_lambda = m_buffer; + } + else + { + // Lambda has spilled to the heap (or requires extended alignment) + m_lambda = reinterpret_cast(azmalloc(sizeof(Lambda), alignof(Lambda))); + TypedRelocate(AZStd::forward(lambda), m_lambda); + } + } + + TypeErasedJob(TypeErasedJob&& other) noexcept; + + TypeErasedJob& operator=(TypeErasedJob&& other) noexcept; + + ~TypeErasedJob(); + + void Link(TypeErasedJob& other); + + // Indicates if this job is a root of the graph (with no dependencies) + bool IsRoot(); + + void AttachToJobGraph(CompiledJobGraph& graph) noexcept + { + m_graph = &graph; + } + + void Invoke() + { + m_invoker(m_lambda); + } + + uint8_t GetPriorityNumber() const + { + return static_cast(m_descriptor.priority); + } + + private: + friend class CompiledJobGraph; + friend class JobWorker; + + // This relocation avoids branches needed if the lambda type is unknown + template + void TypedRelocate(Lambda&& lambda, char* destination) + { + if constexpr (AZStd::is_trivially_move_constructible_v) + { + memcpy(destination, reinterpret_cast(&lambda), sizeof(Lambda)); + } + else if constexpr (AZStd::is_move_constructible_v) + { + new (destination) Lambda{ AZStd::move(lambda) }; + } + else if constexpr (AZStd::is_copy_constructible_v) + { + new (destination) Lambda{ lambda }; + } + else + { + static_assert( + false, + "Job lambdas must be either move or copy constructible. Please verify that all captured data is move or copy " + "constructible."); + } + } + + // Small buffer optimization for lambdas. We cover our bases here by enforcing alignment on the + // class to equal the alignment of the largest scalar type available on the system (generally + // 16 bytes). + char m_buffer[BufferSize]; + + // This value is an offset in a buffer that stores dependency tracking information. + uint32_t m_successorOffset = 0; + uint32_t m_inboundLinkCount = 0; + uint32_t m_outboundLinkCount = 0; + + // May point to the inlined payload buffer, or heap + char* m_lambda = nullptr; + + CompiledJobGraph* m_graph = nullptr; + + JobInvoke_t m_invoker; + + // If nullptr, the lambda is trivially relocatable (via memcpy). Otherwise, it must be invoked + // when instances of this class are moved. + JobRelocate_t m_relocator; + JobDestroy_t m_destroyer; + + JobDescriptor m_descriptor; + }; + + inline void TypeErasedJob::Link(TypeErasedJob& other) + { + ++m_outboundLinkCount; + ++other.m_inboundLinkCount; + } + + inline bool TypeErasedJob::IsRoot() + { + return m_inboundLinkCount == 0; + } +} // namespace AZ::Internal diff --git a/Code/Framework/AzCore/AzCore/Jobs/JobDescriptor.h b/Code/Framework/AzCore/AzCore/Jobs/JobDescriptor.h new file mode 100644 index 0000000000..82a9603bd5 --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/JobDescriptor.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#pragma once + +#include +#include + +namespace AZ +{ + // Job priorities MAY be used judiciously to fine tune runtime execution, with the understanding + // that profiling is needed to understand what the critical path per frame is. Modifying + // job priorities is an EXPERT setting that should succeed a healthy dose of measurement. + enum class JobPriority : uint8_t + { + CRITICAL = 0, + HIGH = 1, + MEDIUM = 2, // Default + LOW = 3, + PRIORITY_COUNT = 4, + }; + + // All submitted jobs are associated with a JobDescriptor which defines the priority, affinitization, + // and tracking of the job resource utilization. + // + // TODO: Define various job kinds and provide a mechanism for cpuMask computation on different systems. + struct JobDescriptor + { + // Unique job kind label (e.g. "frustum culling") + // Job names *must* be provided + const char* jobName = nullptr; + + // Associates a set of job kinds together for budget tracking (e.g. "graphics") + const char* jobGroup = nullptr; + + // EXPERTS ONLY. Jobs of higher priority are executed ahead of any lower priority jobs + // that were queued before it provided they had not yet started + JobPriority priority = JobPriority::MEDIUM; + + // EXPERTS ONLY. A bitmask that restricts jobs of this kind to run only on cores + // corresponding to a set bit. 0 is synonymous with all bits set + uint32_t cpuMask = 0; + }; +} diff --git a/Code/Framework/AzCore/AzCore/Jobs/JobExecutor.cpp b/Code/Framework/AzCore/AzCore/Jobs/JobExecutor.cpp new file mode 100644 index 0000000000..16e4983752 --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/JobExecutor.cpp @@ -0,0 +1,361 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace AZ +{ + constexpr static size_t PRIORITY_COUNT = static_cast(JobPriority::PRIORITY_COUNT); + + namespace Internal + { + CompiledJobGraph::CompiledJobGraph( + AZStd::vector&& jobs, + AZStd::unordered_map>& links, + size_t linkCount, + bool retained) + : m_remaining{ jobs.size() } + , m_retained{ retained } + { + m_jobs = AZStd::move(jobs); + m_dependencyCounts = reinterpret_cast*>(azcalloc(sizeof(AZStd::atomic) * m_jobs.size())); + m_successors.resize(linkCount); + + uint32_t* cursor = m_successors.data(); + + for (size_t i = 0; i != m_jobs.size(); ++i) + { + TypeErasedJob& job = m_jobs[i]; + job.m_successorOffset = cursor - m_successors.data(); + cursor += job.m_outboundLinkCount; + + AZ_Assert(job.m_outboundLinkCount == links[i].size(), "Job outbound link information mismatch"); + + for (uint32_t j = 0; j != job.m_outboundLinkCount; ++j) + { + m_successors[static_cast(job.m_successorOffset) + j] = links[i][j]; + } + + if (job.m_inboundLinkCount > 0) + { + m_dependencyCounts[i].store(job.m_inboundLinkCount, AZStd::memory_order_release); + } + } + + // TODO: Check for dependency cycles + } + + CompiledJobGraph::~CompiledJobGraph() + { + if (m_dependencyCounts) + { + azfree(m_dependencyCounts); + } + } + + void CompiledJobGraph::Release() + { + if (--m_remaining == 0) + { + if (m_retained) + { + m_remaining = m_jobs.size(); + for (size_t i = 0; i != m_jobs.size(); ++i) + { + TypeErasedJob& job = m_jobs[i]; + if (job.m_inboundLinkCount > 0) + { + m_dependencyCounts[i].store(job.m_inboundLinkCount, AZStd::memory_order_release); + } + } + } + + if (m_waitEvent) + { + m_waitEvent->m_submitted = false; + m_waitEvent->Signal(); + } + + if (!m_retained) + { + azdestroy(this); + } + } + } + + struct QueueStatus + { + AZStd::atomic head; + AZStd::atomic tail; + AZStd::atomic reserve; + }; + + // The Job Queue is a lock free 4-priority queue. Its basic operation is as follows: + // Each priority level is associated with a different queue, corresponding to the maximum size of a uint16_t. + // Each queue is implemented as a ring buffer, and a 64 bit atomic maintains the following state per queue: + // - offset to the "head" of the ring, from where we acquire elements + // - offset to the "tail" of the ring, which tracks where new elements should be enqueued + // - offset to a tail reservation index, which is used to reserve a slot to enqueue elements + class JobQueue final + { + public: + // Preallocating upfront allows us to reserve slots to insert jobs without locks. + // Each thread allocated by the job manager consumes ~2 MB. + constexpr static uint16_t MaxQueueSize = 0xffff; + constexpr static uint8_t PriorityLevelCount = static_cast(JobPriority::PRIORITY_COUNT); + + JobQueue() = default; + JobQueue(const JobQueue&) = delete; + JobQueue& operator=(const JobQueue&) = delete; + + bool Enqueue(TypeErasedJob* job); + TypeErasedJob* TryDequeue(); + + private: + QueueStatus m_status[PriorityLevelCount] = {}; + TypeErasedJob* m_queues[PriorityLevelCount][MaxQueueSize] = {}; + }; + + bool JobQueue::Enqueue(TypeErasedJob* job) + { + uint8_t priority = job->GetPriorityNumber(); + QueueStatus& status = m_status[priority]; + + while (true) + { + uint16_t reserve = status.reserve.load(); + uint16_t head = status.head.load(); + + // Enqueuing is done in two phases because we cannot atomically write the job to the slot we reserve + // and simulataneously publish the fact that the slot is now available. + if (reserve != head - 1) + { + // Try to reserve a slot + if (status.reserve.compare_exchange_weak(reserve, reserve + 1)) + { + m_queues[priority][reserve] = job; + + uint16_t expectedReserve = reserve; + + // Increment the tail to advertise the new job + while (!status.tail.compare_exchange_weak(expectedReserve, reserve + 1)) + { + expectedReserve = reserve; + } + + return status.head == status.tail - 1; + } + + // We failed to reserve a slot, try again + } + else + { + // TODO need exponential backup here + AZStd::this_thread::sleep_for(AZStd::chrono::microseconds{ 100 }); + } + } + } + + TypeErasedJob* JobQueue::TryDequeue() + { + for (size_t priority = 0; priority != PriorityLevelCount; ++priority) + { + QueueStatus& status = m_status[priority]; + while (true) + { + uint16_t head = status.head.load(); + uint16_t tail = status.tail.load(); + if (head == tail) + { + // Queue empty + break; + } + else + { + TypeErasedJob* job = m_queues[priority][status.head]; + if (status.head.compare_exchange_weak(head, head + 1)) + { + return job; + } + } + } + } + + return nullptr; + } + + class JobWorker + { + public: + void Spawn(::AZ::JobExecutor& executor, size_t id, AZStd::semaphore& initSemaphore, bool affinitize) + { + m_executor = &executor; + + AZStd::string threadName = AZStd::string::format("JobWorker %zu", id); + AZStd::thread_desc desc = {}; + desc.m_name = threadName.c_str(); + if (affinitize) + { + desc.m_cpuId = 1 << id; + } + m_active.store(true, AZStd::memory_order_release); + + m_thread = AZStd::thread{ [this, &initSemaphore] + { + initSemaphore.release(); + Run(); + }, + &desc }; + } + + void Join() + { + m_active.store(false, AZStd::memory_order_release); + m_semaphore.release(); + m_thread.join(); + } + + void Enqueue(TypeErasedJob* job) + { + if (m_queue.Enqueue(job)) + { + // The queue was empty prior to enqueueing the job, release the semaphore + m_semaphore.release(); + } + } + + private: + void Run() + { + while (m_active) + { + m_semaphore.acquire(); + // m_semaphore.try_acquire_for(AZStd::chrono::microseconds{ 10 }); + + if (!m_active) + { + return; + } + + TypeErasedJob* job = m_queue.TryDequeue(); + while (job) + { + job->Invoke(); + // Decrement counts for all job successors + for (size_t j = 0; j != job->m_outboundLinkCount; ++j) + { + uint32_t successorIndex = job->m_graph->m_successors[job->m_successorOffset + j]; + if (--job->m_graph->m_dependencyCounts[successorIndex] == 0) + { + m_executor->Submit(job->m_graph->m_jobs[successorIndex]); + } + } + + job->m_graph->Release(); + --m_executor->m_remaining; + + job = m_queue.TryDequeue(); + } + } + } + + AZStd::thread m_thread; + AZStd::atomic m_active; + AZStd::binary_semaphore m_semaphore; + + ::AZ::JobExecutor* m_executor; + JobQueue m_queue; + }; + } // namespace Internal + + JobExecutor& JobExecutor::Instance() + { + // TODO: Create the default executor as part of a component (as in JobManagerComponent) + static JobExecutor executor; + return executor; + } + + JobExecutor::JobExecutor(uint32_t threadCount) + { + // TODO: Configure thread count + affinity based on configuration + m_threadCount = threadCount == 0 ? AZStd::thread::hardware_concurrency() : threadCount; + + m_workers = reinterpret_cast(azmalloc(m_threadCount * sizeof(Internal::JobWorker))); + + bool affinitize = m_threadCount == AZStd::thread::hardware_concurrency(); + + AZStd::semaphore initSemaphore; + + for (size_t i = 0; i != m_threadCount; ++i) + { + new (m_workers + i) Internal::JobWorker{}; + m_workers[i].Spawn(*this, i, initSemaphore, affinitize); + } + + for (size_t i = 0; i != m_threadCount; ++i) + { + initSemaphore.acquire(); + } + } + + JobExecutor::~JobExecutor() + { + for (size_t i = 0; i != m_threadCount; ++i) + { + m_workers[i].Join(); + m_workers[i].~JobWorker(); + } + + azfree(m_workers); + } + + void JobExecutor::Submit(Internal::CompiledJobGraph& graph) + { + for (Internal::TypeErasedJob& job : graph.Jobs()) + { + job.AttachToJobGraph(graph); + } + + // Submit all jobs that have no inbound edges + for (Internal::TypeErasedJob& job : graph.Jobs()) + { + if (job.IsRoot()) + { + Submit(job); + } + } + } + + void JobExecutor::Submit(Internal::TypeErasedJob& job) + { + // TODO: Something more sophisticated is likely needed here. + // First, we are completely ignoring affinity. + // Second, some heuristics on core availability will help distribute work more effectively + ++m_remaining; + m_workers[++m_lastSubmission % m_threadCount].Enqueue(&job); + } + + void JobExecutor::Drain() + { + while (m_remaining > 0) + { + AZStd::this_thread::sleep_for(AZStd::chrono::milliseconds{ 100 }); + } + } +} // namespace AZ diff --git a/Code/Framework/AzCore/AzCore/Jobs/JobExecutor.h b/Code/Framework/AzCore/AzCore/Jobs/JobExecutor.h new file mode 100644 index 0000000000..9418126678 --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/JobExecutor.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace AZ +{ + class JobGraphEvent; + + namespace Internal + { + class CompiledJobGraph final + { + public: + AZ_CLASS_ALLOCATOR(CompiledJobGraph, SystemAllocator, 0) + + CompiledJobGraph( + AZStd::vector&& jobs, + AZStd::unordered_map>& links, + size_t linkCount, + bool retained); + + ~CompiledJobGraph(); + + AZStd::vector& Jobs() noexcept + { + return m_jobs; + } + + // Indicate that a constituent job has finished and decrement a counter to determine if the + // graph should be freed + void Release(); + + private: + friend class JobGraph; + friend class JobWorker; + + AZStd::vector m_jobs; + AZStd::vector m_successors; + AZStd::atomic* m_dependencyCounts = nullptr; + JobGraphEvent* m_waitEvent = nullptr; + AZStd::atomic m_remaining; + bool m_retained; + }; + + class JobWorker; + } // namespace Internal + + class JobExecutor + { + public: + AZ_CLASS_ALLOCATOR(JobExecutor, SystemAllocator, 0); + + static JobExecutor& Instance(); + + // Passing 0 for the threadCount requests for the thread count to match the hardware concurrency + JobExecutor(uint32_t threadCount = 0); + ~JobExecutor(); + + void Submit(Internal::CompiledJobGraph& graph); + + void Submit(Internal::TypeErasedJob& job); + + // Busy wait until jobs are cleared from the executor (note, does not prevent future jobs from being submitted) + void Drain(); + private: + friend class Internal::JobWorker; + + Internal::JobWorker* m_workers; + uint32_t m_threadCount = 0; + AZStd::atomic m_lastSubmission; + AZStd::atomic m_remaining; + }; +} // namespace AZ diff --git a/Code/Framework/AzCore/AzCore/Jobs/JobGraph.cpp b/Code/Framework/AzCore/AzCore/Jobs/JobGraph.cpp new file mode 100644 index 0000000000..b715e0ada7 --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/JobGraph.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include + +#include + +namespace AZ +{ + using Internal::CompiledJobGraph; + + void JobToken::PrecedesInternal(JobToken& comesAfter) + { + AZ_Assert(!m_parent.m_submitted, "Cannot mutate a JobGraph that was previously submitted."); + + // Increment inbound/outbound edge counts + m_parent.m_jobs[m_index].Link(m_parent.m_jobs[comesAfter.m_index]); + + m_parent.m_links[m_index].emplace_back(comesAfter.m_index); + + ++m_parent.m_linkCount; + } + + JobGraph::~JobGraph() + { + if (m_retained && m_compiledJobGraph) + { + azdestroy(m_compiledJobGraph); + } + } + + void JobGraph::Submit(JobGraphEvent* waitEvent) + { + SubmitOnExecutor(JobExecutor::Instance(), waitEvent); + } + + void JobGraph::SubmitOnExecutor(JobExecutor& executor, JobGraphEvent* waitEvent) + { + m_submitted = true; + + if (!m_compiledJobGraph) + { + m_compiledJobGraph = aznew CompiledJobGraph(AZStd::move(m_jobs), m_links, m_linkCount, m_retained); + } + + m_compiledJobGraph->m_waitEvent = waitEvent; + + executor.Submit(*m_compiledJobGraph); + + if (waitEvent) + { + waitEvent->m_submitted = true; + } + } +} diff --git a/Code/Framework/AzCore/AzCore/Jobs/JobGraph.h b/Code/Framework/AzCore/AzCore/Jobs/JobGraph.h new file mode 100644 index 0000000000..62565ea78c --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/JobGraph.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#pragma once + +// NOTE: If adding additional header/symbol dependencies, consider if such additions are better +// suited in the private CompiledJobGraph implementation instead to keep this header lean. +#include +#include +#include +#include +#include +#include + +namespace AZ +{ + namespace Internal + { + class CompiledJobGraph; + } + class JobExecutor; + + // A JobToken is returned each time a Job is added to the JobGraph. JobTokens are used to + // express dependencies between jobs within the graph. + class JobToken final + { + public: + // Indicate that this job must finish before the job passed as the argument + template + void Precedes(JT&... tokens); + + private: + friend class JobGraph; + + void PrecedesInternal(JobToken& comesAfter); + + // Only the JobGraph should be creating JobToken + JobToken(JobGraph& parent, size_t index); + + JobGraph& m_parent; + size_t m_index; + }; + + // A JobGraphEvent may be used to block until a job graph has finished executing. Usage + // is NOT recommended for the majority of tasks (prefer to simply containing expanding/contracting + // the graph without synchronization over the course of the frame). However, the event + // is useful for the edges of the computation graph. + // + // You are responsible for ensuring the event object lifetime exceeds the job graph lifetime. + // + // After the JobGraphEvent is signaled, you are allowed to reuse the same JobGraphEvent + // for a future submission. + class JobGraphEvent + { + public: + bool IsSignaled(); + void Wait(); + + private: + friend class ::AZ::Internal::CompiledJobGraph; + friend class JobGraph; + void Signal(); + + AZStd::binary_semaphore m_semaphore; + bool m_submitted = false; + }; + + // The JobGraph encapsulates a set of jobs and their interdependencies. After adding + // jobs, and marking dependencies as necessary, the entire graph is submitted via + // the JobGraph::Submit method. + // + // The JobGraph MAY be retained across multiple frames and resubmitted, provided the + // user provides some guarantees (see comments associated with JobGraph::Retain). + class JobGraph final + { + public: + ~JobGraph(); + + // Add a job to the graph, retrieiving a token that can be used to express dependencies + // between jobs. The first argument specifies the JobKind, used for tracking the job. + template + JobToken AddJob(JobDescriptor const& descriptor, Lambda&& lambda); + + template + AZStd::fixed_vector AddJobs(JobDescriptor const& descriptor, Lambdas&&... lambdas); + + // By default, you are responsible for retaining the JobGraph, indicating you promise that + // this JobGraph will live as long as it takes for all constituent jobs to complete. + // Once retained, this job graph can be resubmitted after completion without any + // modifications. JobTokens that were created as a result of adding jobs used to + // mark dependencies DO NOT need to outlive the job graph. + // + // Invoking Detach PRIOR to submission indicates you wish the jobs associated with this + // JobGraph to deallocate upon completion. After invoking Detach, you may let this JobGraph + // go out of scope or deallocate after submission. + // + // NOTE: The JobGraph has no concept of resources used by design. Resubmission + // of the job graph is expected to rely on either indirection, or safe overwriting + // of previously used memory to supply new data (this can even be done as the first + // job in the graph). + void Detach(); + + // Invoke the job graph, asserting if there are dependency violations. Note that + // submitting the same graph multiple times to process simultaneously is VALID + // behavior. This is, for example, a mechanism that allows a job graph to loop + // in perpetuity (in fact, the entire frame could be modeled as a single job graph, + // where the final job resubmits the job graph again). + // + // This API is not designed to protect against memory safety violations (nothing + // can prevent a user from incorrectly aliasing memory unsafely even without repeated + // submission). To catch memory safety violations, it is ENCOURAGED that you access + // data through JobResource handles. + void Submit(JobGraphEvent* waitEvent = nullptr); + + // Same as submit but run on a different executor than the default system executor + void SubmitOnExecutor(JobExecutor& executor, JobGraphEvent* waitEvent = nullptr); + + private: + friend class JobToken; + + Internal::CompiledJobGraph* m_compiledJobGraph = nullptr; + + AZStd::vector m_jobs; + + // Job index |-> Dependent job indices + AZStd::unordered_map> m_links; + + uint32_t m_linkCount = 0; + bool m_retained = true; + bool m_submitted = false; + }; +} // namespace AZ + +#include diff --git a/Code/Framework/AzCore/AzCore/Jobs/JobGraph.inl b/Code/Framework/AzCore/AzCore/Jobs/JobGraph.inl new file mode 100644 index 0000000000..f541d9923f --- /dev/null +++ b/Code/Framework/AzCore/AzCore/Jobs/JobGraph.inl @@ -0,0 +1,62 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#pragma once + +namespace AZ +{ + inline JobToken::JobToken(JobGraph& parent, size_t index) + : m_parent{ parent } + , m_index{ index } + { + } + + template + inline void JobToken::Precedes(JT&... tokens) + { + (PrecedesInternal(tokens), ...); + } + + inline bool JobGraphEvent::IsSignaled() + { + AZ_Assert(m_submitted, "Querying the status of a job graph event that was never submitted along with the jobgraph"); + return m_semaphore.try_acquire_for(AZStd::chrono::milliseconds{ 0 }); + } + + inline void JobGraphEvent::Wait() + { + AZ_Assert(m_submitted, "Waiting on a job graph event that was never submitted along with the jobgraph"); + m_semaphore.acquire(); + } + + inline void JobGraphEvent::Signal() + { + m_semaphore.release(); + } + + template + inline JobToken JobGraph::AddJob(JobDescriptor const& desc, Lambda&& lambda) + { + AZ_Assert(!m_submitted, "Cannot mutate a JobGraph that was previously submitted."); + + m_jobs.emplace_back(desc, AZStd::forward(lambda)); + + return { *this, m_jobs.size() - 1 }; + } + + template + inline AZStd::fixed_vector AddJobs(JobDescriptor const& descriptor, Lambdas&&... lambdas) + { + return { AddJob(descriptor, lambdas)... }; + } + + inline void JobGraph::Detach() + { + m_retained = false; + } +} // namespace AZ diff --git a/Code/Framework/AzCore/AzCore/azcore_files.cmake b/Code/Framework/AzCore/AzCore/azcore_files.cmake index e3d2987a3c..a23ec9ab82 100644 --- a/Code/Framework/AzCore/AzCore/azcore_files.cmake +++ b/Code/Framework/AzCore/AzCore/azcore_files.cmake @@ -221,6 +221,8 @@ set(FILES Jobs/Internal/JobManagerWorkStealing.cpp Jobs/Internal/JobManagerWorkStealing.h Jobs/Internal/JobNotify.h + Jobs/Internal/JobTypeEraser.cpp + Jobs/Internal/JobTypeEraser.h Jobs/Job.cpp Jobs/Job.h Jobs/JobCancelGroup.h @@ -228,8 +230,14 @@ set(FILES Jobs/JobCompletionSpin.h Jobs/JobContext.cpp Jobs/JobContext.h + Jobs/JobDescriptor.h Jobs/JobEmpty.h + Jobs/JobExecutor.cpp + Jobs/JobExecutor.h Jobs/JobFunction.h + Jobs/JobGraph.cpp + Jobs/JobGraph.h + Jobs/JobGraph.inl Jobs/JobManager.cpp Jobs/JobManager.h Jobs/JobManagerBus.h diff --git a/Code/Framework/AzCore/AzCore/std/parallel/thread.h b/Code/Framework/AzCore/AzCore/std/parallel/thread.h index a46671a4cd..9f7830c91b 100644 --- a/Code/Framework/AzCore/AzCore/std/parallel/thread.h +++ b/Code/Framework/AzCore/AzCore/std/parallel/thread.h @@ -69,8 +69,7 @@ namespace AZStd int m_priority{ -100000 }; //! The CPU ids (as a bitfield) that this thread will be running on, see \ref AZStd::thread_desc::m_cpuId. - //! Windows: This parameter is ignored. - //! On other platforms, each bit maps directly to the core numbers [0-n], default is 0 + //! Each bit maps directly to the core numbers [0-n], default is 0 int m_cpuId{ AFFINITY_MASK_ALL }; //! If we can join the thread. diff --git a/Code/Framework/AzCore/Tests/JobGraphTests.cpp b/Code/Framework/AzCore/Tests/JobGraphTests.cpp new file mode 100644 index 0000000000..175881b89a --- /dev/null +++ b/Code/Framework/AzCore/Tests/JobGraphTests.cpp @@ -0,0 +1,848 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include +#include +#include + +#include + +#include + +using AZ::JobDescriptor; +using AZ::JobGraph; +using AZ::JobGraphEvent; +using AZ::JobExecutor; +using AZ::Internal::TypeErasedJob; +using AZ::JobPriority; + +static JobDescriptor defaultJD{ "JobGraphTestJob", "JobGraphTests" }; + +namespace UnitTest +{ + class JobGraphTestFixture : public AllocatorsTestFixture + { + public: + void SetUp() override + { + AllocatorsTestFixture::SetUp(); + AZ::AllocatorInstance::Create(); + AZ::AllocatorInstance::Create(); + + m_executor = aznew JobExecutor(4); + } + + void TearDown() override + { + azdestroy(m_executor); + AZ::AllocatorInstance::Destroy(); + AZ::AllocatorInstance::Destroy(); + AllocatorsTestFixture::TearDown(); + } + + protected: + JobExecutor* m_executor; + }; + + TEST(JobGraphTests, TrivialJobLambda) + { + int x = 0; + + TypeErasedJob job( + defaultJD, + [&x]() + { + ++x; + }); + job.Invoke(); + + EXPECT_EQ(1, x); + } + + TEST(JobGraphTests, TrivialJobLambdaMove) + { + int x = 0; + + TypeErasedJob job( + defaultJD, + [&x]() + { + ++x; + }); + + TypeErasedJob job2 = AZStd::move(job); + + job2.Invoke(); + + EXPECT_EQ(1, x); + } + + struct TrackMoves + { + TrackMoves() = default; + + TrackMoves(const TrackMoves&) = delete; + + TrackMoves(TrackMoves&& other) + : moveCount{other.moveCount + 1} + { + } + + int moveCount = 0; + }; + + struct TrackCopies + { + TrackCopies() = default; + + TrackCopies(TrackCopies&&) = delete; + + TrackCopies(const TrackCopies& other) + : copyCount{other.copyCount + 1} + { + } + + int copyCount = 0; + }; + + TEST(JobGraphTests, MoveOnlyJobLambda) + { + TrackMoves tm; + int moveCount = 0; + + TypeErasedJob job( + defaultJD, + [tm = AZStd::move(tm), &moveCount] + { + moveCount = tm.moveCount; + }); + job.Invoke(); + + // Two moves are expected. Once into the capture body of the lambda, once to construct + // the type erased job + EXPECT_EQ(2, moveCount); + } + + TEST(JobGraphTests, MoveOnlyJobLambdaMove) + { + TrackMoves tm; + int moveCount = 0; + + TypeErasedJob job( + defaultJD, + [tm = AZStd::move(tm), &moveCount] + { + moveCount = tm.moveCount; + }); + + TypeErasedJob job2 = AZStd::move(job); + job2.Invoke(); + + EXPECT_EQ(3, moveCount); + } + + TEST(JobGraphTests, CopyOnlyJobLambda) + { + TrackCopies tc; + int copyCount = 0; + + TypeErasedJob job( + defaultJD, + [tc, ©Count] + { + copyCount = tc.copyCount; + }); + job.Invoke(); + + // Two copies are expected. Once into the capture body of the lambda, once to construct + // the type erased job + EXPECT_EQ(2, copyCount); + } + + TEST(JobGraphTests, CopyOnlyJobLambdaMove) + { + TrackCopies tc; + int copyCount = 0; + + TypeErasedJob job( + defaultJD, + [tc, ©Count] + { + copyCount = tc.copyCount; + }); + TypeErasedJob job2 = AZStd::move(job); + job2.Invoke(); + + EXPECT_EQ(3, copyCount); + } + + TEST(JobGraphTests, DestroyLambda) + { + // This test ensures that for a lambda with a destructor, the destructor is invoked + // exactly once on a non-moved-from object. + int x = 0; + struct TrackDestroy + { + TrackDestroy(int* px) + : count{ px } + { + } + TrackDestroy(TrackDestroy&& other) + : count{ other.count } + { + other.count = nullptr; + } + ~TrackDestroy() + { + if (count) + { + ++*count; + } + } + int* count = nullptr; + }; + + { + TrackDestroy td{ &x }; + TypeErasedJob job( + defaultJD, + [td = AZStd::move(td)] + { + }); + job.Invoke(); + // Destructor should not have run yet (except on moved-from instances) + EXPECT_EQ(x, 0); + } + + // Destructor should have run now + EXPECT_EQ(x, 1); + } + + TEST_F(JobGraphTestFixture, SerialGraph) + { + int x = 0; + + JobGraph graph; + auto a = graph.AddJob( + defaultJD, + [&] + { + x += 3; + }); + auto b = graph.AddJob( + defaultJD, + [&] + { + x = 4 * x; + }); + auto c = graph.AddJob( + defaultJD, + [&] + { + x -= 1; + }); + + a.Precedes(b); + b.Precedes(c); + + JobGraphEvent ev; + graph.SubmitOnExecutor(*m_executor, &ev); + ev.Wait(); + + EXPECT_EQ(11, x); + } + + TEST_F(JobGraphTestFixture, DetachedGraph) + { + int x = 0; + + JobGraphEvent ev; + + { + JobGraph graph; + auto a = graph.AddJob( + defaultJD, + [&] + { + x += 3; + }); + auto b = graph.AddJob( + defaultJD, + [&] + { + x = 4 * x; + }); + auto c = graph.AddJob( + defaultJD, + [&] + { + x -= 1; + }); + + a.Precedes(b); + b.Precedes(c); + graph.Detach(); + graph.SubmitOnExecutor(*m_executor, &ev); + } + + ev.Wait(); + + EXPECT_EQ(11, x); + } + + TEST_F(JobGraphTestFixture, ForkJoin) + { + AZStd::atomic x = 0; + + // Job a initializes x to 3 + // Job b and c toggles the lowest two bits atomically + // Job d decrements x + + JobGraph graph; + auto a = graph.AddJob( + defaultJD, + [&] + { + x = 0b111; + }); + auto b = graph.AddJob( + defaultJD, + [&] + { + x ^= 1; + }); + auto c = graph.AddJob( + defaultJD, + [&] + { + x ^= 2; + }); + auto d = graph.AddJob( + defaultJD, + [&] + { + x -= 1; + }); + + // a <-- Root + // / \ + // b c + // \ / + // d + + a.Precedes(b, c); + b.Precedes(d); + c.Precedes(d); + + JobGraphEvent ev; + graph.SubmitOnExecutor(*m_executor, &ev); + ev.Wait(); + + EXPECT_EQ(3, x); + } + + TEST_F(JobGraphTestFixture, SpawnSubgraph) + { + AZStd::atomic x = 0; + + JobGraph graph; + auto a = graph.AddJob( + defaultJD, + [&] + { + x = 0b111; + }); + auto b = graph.AddJob( + defaultJD, + [&] + { + x ^= 1; + }); + auto c = graph.AddJob( + defaultJD, + [&] + { + x ^= 2; + + JobGraph subgraph; + auto e = subgraph.AddJob( + defaultJD, + [&] + { + x ^= 0b1000; + }); + auto f = subgraph.AddJob( + defaultJD, + [&] + { + x ^= 0b10000; + }); + auto g = subgraph.AddJob( + defaultJD, + [&] + { + x += 0b1000; + }); + e.Precedes(g); + f.Precedes(g); + JobGraphEvent ev; + subgraph.SubmitOnExecutor(*m_executor, &ev); + ev.Wait(); + }); + auto d = graph.AddJob( + defaultJD, + [&] + { + x -= 1; + }); + + // NOTE: The ideal way to express this topology is without the wait on the subgraph + // at task g, but this is more an illustrative test. Better is to express the entire + // graph in a single larger graph. + // a <-- Root + // / \ + // b c - f + // \ \ \ + // \ e - g + // \ / + // \ / + // \ / + // d + + a.Precedes(b); + a.Precedes(c); + b.Precedes(d); + c.Precedes(d); + + JobGraphEvent ev; + graph.SubmitOnExecutor(*m_executor, &ev); + ev.Wait(); + + EXPECT_EQ(3 | 0b100000, x); + } + + TEST_F(JobGraphTestFixture, RetainedGraph) + { + AZStd::atomic x = 0; + + JobGraph graph; + auto a = graph.AddJob( + defaultJD, + [&] + { + x = 0b111; + }); + auto b = graph.AddJob( + defaultJD, + [&] + { + x ^= 1; + }); + auto c = graph.AddJob( + defaultJD, + [&] + { + x ^= 2; + }); + auto d = graph.AddJob( + defaultJD, + [&] + { + x -= 1; + }); + auto e = graph.AddJob( + defaultJD, + [&] + { + x ^= 0b1000; + }); + auto f = graph.AddJob( + defaultJD, + [&] + { + x ^= 0b10000; + }); + auto g = graph.AddJob( + defaultJD, + [&] + { + x += 0b1000; + }); + + // a <-- Root + // / \ + // b c - f + // \ \ \ + // \ e - g + // \ / + // \ / + // \ / + // d + + a.Precedes(b, c); + b.Precedes(d); + c.Precedes(e, f); + e.Precedes(g); + f.Precedes(g); + g.Precedes(d); + + JobGraphEvent ev; + graph.SubmitOnExecutor(*m_executor, &ev); + ev.Wait(); + + EXPECT_EQ(3 | 0b100000, x); + x = 0; + + graph.SubmitOnExecutor(*m_executor, &ev); + ev.Wait(); + + EXPECT_EQ(3 | 0b100000, x); + } +} // namespace UnitTest + +#if defined(HAVE_BENCHMARK) +namespace Benchmark +{ + class JobGraphBenchmarkFixture : public ::benchmark::Fixture + { + public: + static const int32_t LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH = 1; + static const int32_t MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH = 1024; + static const int32_t HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH = 1048576; + + static const int32_t SMALL_NUMBER_OF_JOBS = 10; + static const int32_t MEDIUM_NUMBER_OF_JOBS = 1024; + static const int32_t LARGE_NUMBER_OF_JOBS = 16384; + static AZStd::atomic s_numIncompleteJobs; + + int m_depth = 1; + JobGraph* graphs; + + void SetUp(benchmark::State&) override + { + s_numIncompleteJobs = 0; + + m_executor = aznew JobExecutor(0); + graphs = new JobGraph[4]; + + // Generate some random priorities + m_randomPriorities.resize(LARGE_NUMBER_OF_JOBS); + std::mt19937_64 randomPriorityGenerator(1); // Always use the same seed + std::uniform_int_distribution<> randomPriorityDistribution(0, static_cast(AZ::JobPriority::PRIORITY_COUNT)); + std::generate( + m_randomPriorities.begin(), m_randomPriorities.end(), + [&randomPriorityDistribution, &randomPriorityGenerator]() + { + return randomPriorityDistribution(randomPriorityGenerator); + }); + + // Generate some random depths + m_randomDepths.resize(LARGE_NUMBER_OF_JOBS); + std::mt19937_64 randomDepthGenerator(1); // Always use the same seed + std::uniform_int_distribution<> randomDepthDistribution( + LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + std::generate( + m_randomDepths.begin(), m_randomDepths.end(), + [&randomDepthDistribution, &randomDepthGenerator]() + { + return randomDepthDistribution(randomDepthGenerator); + }); + + for (size_t i = 0; i != 4; ++i) + { + graphs[i].AddJob( + descriptors[i], + [this] + { + benchmark::DoNotOptimize(CalculatePi(m_depth)); + --s_numIncompleteJobs; + }); + } + } + + void TearDown(benchmark::State&) override + { + delete[] graphs; + azdestroy(m_executor); + m_randomDepths = {}; + m_randomPriorities = {}; + } + + JobDescriptor descriptors[4] = { { "critical", "benchmark", JobPriority::CRITICAL }, + { "high", "benchmark", JobPriority::HIGH }, + { "mediium", "benchmark", JobPriority::MEDIUM }, + { "low", "benchmark", JobPriority::LOW } }; + + static inline double CalculatePi(AZ::u32 depth) + { + double pi = 0.0; + for (AZ::u32 i = 0; i < depth; ++i) + { + const double numerator = static_cast(((i % 2) * 2) - 1); + const double denominator = static_cast((2 * i) - 1); + pi += numerator / denominator; + } + return (pi - 1.0) * 4; + } + + void RunCalculatePiJob(int32_t depth, int8_t priority) + { + m_depth = depth; + ++s_numIncompleteJobs; + + graphs[priority].SubmitOnExecutor(*m_executor); + } + + void RunMultipleCalculatePiJobsWithDefaultPriority(uint32_t numberOfJobs, int32_t depth) + { + for (size_t i = 0; i != numberOfJobs; ++i) + { + RunCalculatePiJob(depth, 2); + } + + while (s_numIncompleteJobs > 0) + { + } + } + + void RunMultipleCalculatePiJobsWithRandomPriority(uint32_t numberOfJobs, int32_t depth) + { + for (size_t i = 0; i != numberOfJobs; ++i) + { + RunCalculatePiJob(depth, m_randomPriorities[i]); + } + + while (s_numIncompleteJobs > 0) + { + } + } + + void RunMultipleCalculatePiJobsWithRandomDepthAndDefaultPriority(uint32_t numberOfJobs) + { + for (size_t i = 0; i != numberOfJobs; ++i) + { + RunCalculatePiJob(m_randomDepths[i], 0); + } + + while (s_numIncompleteJobs > 0) + { + } + } + + void RunMultipleCalculatePiJobsWithRandomDepthAndRandomPriority(uint32_t numberOfJobs) + { + for (size_t i = 0; i != numberOfJobs; ++i) + { + RunCalculatePiJob(m_randomDepths[i], m_randomPriorities[i]); + } + + while (s_numIncompleteJobs > 0) + { + } + } + + JobExecutor* m_executor; + AZStd::vector m_randomDepths; + AZStd::vector m_randomPriorities; + }; + + AZStd::atomic JobGraphBenchmarkFixture::s_numIncompleteJobs = 0; + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfLightWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(SMALL_NUMBER_OF_JOBS, LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfLightWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(MEDIUM_NUMBER_OF_JOBS, LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfLightWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(LARGE_NUMBER_OF_JOBS, LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfMediumWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(SMALL_NUMBER_OF_JOBS, MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfMediumWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(MEDIUM_NUMBER_OF_JOBS, MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfMediumWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(LARGE_NUMBER_OF_JOBS, MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfHeavyWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(SMALL_NUMBER_OF_JOBS, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfHeavyWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(MEDIUM_NUMBER_OF_JOBS, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfHeavyWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithDefaultPriority(LARGE_NUMBER_OF_JOBS, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfRandomWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomDepthAndDefaultPriority(SMALL_NUMBER_OF_JOBS); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfRandomWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomDepthAndDefaultPriority(MEDIUM_NUMBER_OF_JOBS); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfRandomWeightJobsWithDefaultPriority)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomDepthAndDefaultPriority(LARGE_NUMBER_OF_JOBS); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfLightWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(SMALL_NUMBER_OF_JOBS, LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfLightWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(MEDIUM_NUMBER_OF_JOBS, LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfLightWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(LARGE_NUMBER_OF_JOBS, LIGHT_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfMediumWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(SMALL_NUMBER_OF_JOBS, MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfMediumWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(MEDIUM_NUMBER_OF_JOBS, MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfMediumWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(LARGE_NUMBER_OF_JOBS, MEDIUM_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfHeavyWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(SMALL_NUMBER_OF_JOBS, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfHeavyWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(MEDIUM_NUMBER_OF_JOBS, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfHeavyWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomPriority(LARGE_NUMBER_OF_JOBS, HEAVY_WEIGHT_JOB_CALCULATE_PI_DEPTH); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunSmallNumberOfRandomWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomDepthAndRandomPriority(SMALL_NUMBER_OF_JOBS); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunMediumNumberOfRandomWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomDepthAndRandomPriority(MEDIUM_NUMBER_OF_JOBS); + } + } + + BENCHMARK_F(JobGraphBenchmarkFixture, RunLargeNumberOfRandomWeightJobsWithRandomPriorities)(benchmark::State& state) + { + for (auto _ : state) + { + RunMultipleCalculatePiJobsWithRandomDepthAndRandomPriority(LARGE_NUMBER_OF_JOBS); + } + } +} // namespace Benchmark +#endif diff --git a/Code/Framework/AzCore/Tests/azcoretests_files.cmake b/Code/Framework/AzCore/Tests/azcoretests_files.cmake index d340173c87..480baabe7a 100644 --- a/Code/Framework/AzCore/Tests/azcoretests_files.cmake +++ b/Code/Framework/AzCore/Tests/azcoretests_files.cmake @@ -40,6 +40,7 @@ set(FILES Interface.cpp IO/Path/PathTests.cpp IPC.cpp + JobGraphTests.cpp Jobs.cpp JSON.cpp FixedWidthIntegers.cpp