Metal pso caching support via MTLBinaryArchive (#7212)

* PipelineLibrary (PSO Caching) support for Metal

- API changes to handle Metal drivers implictly doing save/load of PipelineLibrary data
- Fixed up code related to Metal device selection
- PipelineLibrary support for Mac and ios

Signed-off-by: moudgils <moudgils@amazon.com>

* Fix compile errors for Dx12, Vulkan backend + Unit tests

Signed-off-by: moudgils <47460854+moudgils@users.noreply.github.com>

* Fixed errors related to M1 GPU

Signed-off-by: moudgils <moudgils@amazon.com>

* Fix a minor 'tab' validation issue

Signed-off-by: moudgils <moudgils@amazon.com>

* Addressed feedback

Signed-off-by: moudgils <moudgils@amazon.com>

* Minor feedback

Signed-off-by: moudgils <moudgils@amazon.com>

* Added a few asserts

Signed-off-by: moudgils <47460854+moudgils@users.noreply.github.com>

* Fix a typo

Signed-off-by: moudgils <moudgils@amazon.com>
monroegm-disable-blank-issue-2
moudgils 4 years ago committed by GitHub
parent 9a30c5366c
commit 608411ab99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -10,7 +10,6 @@
"type" : "Compute"
}
]
},
"DisabledRHIBackends": ["metal"]
}
}

@ -75,6 +75,9 @@ namespace AZ
//! Whether Unbounded Array support is available.
bool m_unboundedArrays = false;
//! Whether PipelineLibrary related serialized data needs to be loaded/saved explicitly as drivers (like dx12/vk) do not support it internally
bool m_isPsoCacheFileOperationsNeeded = true;
/// Additional features here.
};
}

@ -22,14 +22,15 @@ namespace AZ
{
//! A list of popular vendor Ids.
AZ_ENUM_CLASS_WITH_UNDERLYING_TYPE(VendorId, uint32_t,
(Unknown, 0),
(Intel, 0x8086),
(nVidia, 0x10de),
(AMD, 0x1002),
(Qualcomm, 0x5143),
(Samsung, 0x1099),
(ARM, 0x13B5),
(Warp, 0x1414)
(Unknown, 0),
(Intel, 0x8086),
(nVidia, 0x10de),
(AMD, 0x1002),
(Qualcomm, 0x5143),
(Samsung, 0x1099),
(ARM, 0x13B5),
(Warp, 0x1414),
(Apple, 0x106B)
);
void ReflectVendorIdEnums(ReflectContext* context);

@ -19,7 +19,15 @@ namespace AZ
/// A handle typed to the pipeline library. Used by the PipelineStateCache to abstract access.
using PipelineLibraryHandle = Handle<uint32_t, class PipelineLibrary>;
struct PipelineLibraryDescriptor
{
//Serialized data with which to init the PipelineLibrary
ConstPtr<PipelineLibraryData> m_serializedData = nullptr;
//The file path name associated with serialized data. It can be passed
//to the RHI backend to do load/save operation via the drivers.
AZStd::string m_filePath;
};
//! PipelineState initialization is an expensive operation on certain platforms. If multiple pipeline states
//! are created with little variation between them, the contents are still duplicated. This class is an allocation
//! context for pipeline states, provided at PipelineState::Init, which will perform de-duplication of
@ -50,8 +58,8 @@ namespace AZ
//! serialized and the contents saved to disk. Subsequent loads will experience much faster pipeline
//! state creation times (on supported platforms). On success, the library is transitioned to the
//! initialized state. On failure, the library remains uninitialized.
//! @param serializedData The initial serialized data used to initialize the library. It can be null.
ResultCode Init(Device& device, const PipelineLibraryData* serializedData);
//! @param descriptor The descriptor needed to init the PipelineLibrary.
ResultCode Init(Device& device, const PipelineLibraryDescriptor& descriptor);
//! Merges the contents of other libraries into this library. This method must be called
//! on an initialized library. A common use case for this method is to construct thread-local
@ -65,6 +73,9 @@ namespace AZ
//! this method to extract serialized data prior to application shutdown, save it to disk, and
//! use it when initializing on subsequent runs.
ConstPtr<PipelineLibraryData> GetSerializedData() const;
//! Saves the platform-specific data to disk using the filePath provided. This is done through RHI backend drivers.
bool SaveSerializedData(const AZStd::string& filePath) const;
//! Returns whether the current library need to be merged
virtual bool IsMergeRequired() const;
@ -79,7 +90,7 @@ namespace AZ
// Platform API
/// Called when the library is being created.
virtual ResultCode InitInternal(Device& device, const PipelineLibraryData* serializedData) = 0;
virtual ResultCode InitInternal(Device& device, const PipelineLibraryDescriptor& descriptor) = 0;
/// Called when the library is being shutdown.
virtual void ShutdownInternal() = 0;
@ -89,6 +100,9 @@ namespace AZ
/// Called when the library is serializing out platform-specific data.
virtual ConstPtr<PipelineLibraryData> GetSerializedDataInternal() const = 0;
/// Called when we want the RHI backend to save out the Pipeline Library via the drivers
virtual bool SaveSerializedDataInternal(const AZStd::string& filePath) const = 0;
//////////////////////////////////////////////////////////////////////////
};

@ -22,131 +22,125 @@ namespace AZ
{
namespace RHI
{
/**
* Problem: High-level rendering code works in 'materials', 'shaders', and 'models', but the RHI works in
* 'pipeline states'. Therefore, a translation process must exist to resolve a shader variation (plus runtime
* state) into a pipeline state suitable for consumption by the RHI. These resolve operations can number in the
* thousands per frame, and (ideally) are heavily jobified.
*
* Another problem is that pipeline state creation is not fast, as on some platforms it will involve synchronous
* byte-code compilation. This could take anywhere from <1ms to >150ms. If compilation is done synchronously and
* immediately, the cache will effectively stall the entire process if multiple threads request the same pending
* pipeline state.
*
* Therefore, PipelineStateCache adheres to the following requirements:
* 1. A cache miss does not serialize all threads on a pipeline state compilation event.
* 2. A cache hit results in zero contention.
*
* Justification: Most pipeline state compilation will occur in the first few frames, but can also occur when new
* 'permutations' are hit while exploring. In the 90% case, the cache is warm and each frame results in a 100%
* cache hit rate. With zero locks, this scales extremely well across threads and removes a bottleneck from the
* render code. In the event that compilations are required, multiple threads are now able to participate in the
* compilation process without serializing each other.
*
* To accomplish this, the pipeline state cache uses three 'phases' of caching.
* 1. A global, read-only cache - designed as the 'fast path' for when the cache is warm.
* 2. A thread-local cache - reduces contention on the global pending cache for successive requests on the same thread.
* 3. A global, locked pending cache - de-duplicates pipeline state allocations.
*
* Each library has global and thread-local caches. Initially, the global cache is checked, if that fails, the
* thread-local cache is checked (no locks taken). Finally, the pending cache is checked under a lock and if
* the entry still doesn't exist, it is allocated and added to the pending cache. A thread-local PipelineLibrary
* is used to compile the pipeline state, which eliminates all locking for compilation.
*
* Pipeline states can be acquired at any time and from any thread. The cache will take a reader lock. During
* AcquirePipelineState, the global read-only cache is not updated, but the thread-local cache and pending
* global cache may be. Furthermore, compilations are performed on the calling thread, which means that separate
* thread may return a pipeline state that is still compiling. It is required that all pending AcquirePipelineState
* calls complete prior to using the returned pipeline state pointers during command list recording.
*
* Example Scenarios:
*
* 1. Threads request the same un-cached pipeline state:
*
* Both the global read-only cache and thread-local caches miss, one thread wins the race to take a lock
* on the global pending cache. It allocates but does not compile the pipeline state. All other threads wait on the
* lock (which should be quick) and then find and return the uninitialized pipeline state. The compiling
* thread uses the thread-local PipelineLibrary instance to compile the pipeline state. Non-compiling threads
* will enter the uninitialized pipeline state into their thread-local cache (as does the compiling thread once it
* completes). Note that the compiling thread is now busy, but all remaining threads are now unblocked to compile other
* pipeline states.
*
* 2. A thread requests a pipeline state being compiled on another thread:
*
* In this case, the global read-only cache won't have the pipeline state (since it's being compiled during
* the current cycle, and the pending cache is only merged at the end of the cycle). It also won't have the
* entry in the thread-local cache. It then hits the global pending cache, which will return the live instance
* (being compiled). It then caches the result in its thread-local cache, so that successive requests will no
* longer require a lock on the pending cache.
*
* 3. The cache is warm and all pipeline states are compiled:
*
* Each thread hits the same read-only cache (which succeeds) and returns the pipeline state immediately.
* This is the fast-path case where multiple threads are now able to resolve pipeline states with very
* little performance overhead.
*
* Example Usage:
* @code{.cpp}
* // Create library instance.
* RHI::PipelineLibraryHandle libraryHandle = pipelineStateCache->CreateLibrary(serializedData); // Initial data loaded from disk.
*
* // In jobs. Lots and lots of requests.
* const RHI::PipelineState* pipelineState = pipelineStateCache->AcquirePipelineState(libraryHandle, descriptor);
*
* // Reset contents of library. Releases all pipeline state references. Library remains valid.
* pipelineStateCache->ResetLibrary(libraryHandle);
*
* // Release library and all held references.
* pipelineStateCache->ReleaseLibrary(libraryHandle);
* @endcode
*/
//! Problem: High-level rendering code works in 'materials', 'shaders', and 'models', but the RHI works in
//! 'pipeline states'. Therefore, a translation process must exist to resolve a shader variation (plus runtime
//! state) into a pipeline state suitable for consumption by the RHI. These resolve operations can number in the
//! thousands per frame, and (ideally) are heavily jobified.
//!
//! Another problem is that pipeline state creation is not fast, as on some platforms it will involve synchronous
//! byte-code compilation. This could take anywhere from <1ms to >150ms. If compilation is done synchronously and
//! immediately, the cache will effectively stall the entire process if multiple threads request the same pending
//! pipeline state.
//!
//! Therefore, PipelineStateCache adheres to the following requirements:
//! 1. A cache miss does not serialize all threads on a pipeline state compilation event.
//! 2. A cache hit results in zero contention.
//!
//! Justification: Most pipeline state compilation will occur in the first few frames, but can also occur when new
//! 'permutations' are hit while exploring. In the 90% case, the cache is warm and each frame results in a 100%
//! cache hit rate. With zero locks, this scales extremely well across threads and removes a bottleneck from the
//! render code. In the event that compilations are required, multiple threads are now able to participate in the
//! compilation process without serializing each other.
//!
//! To accomplish this, the pipeline state cache uses three 'phases' of caching.
//! 1. A global, read-only cache - designed as the 'fast path' for when the cache is warm.
//! 2. A thread-local cache - reduces contention on the global pending cache for successive requests on the same thread.
//! 3. A global, locked pending cache - de-duplicates pipeline state allocations.
//!
//! Each library has global and thread-local caches. Initially, the global cache is checked, if that fails, the
//! thread-local cache is checked (no locks taken). Finally, the pending cache is checked under a lock and if
//! the entry still doesn't exist, it is allocated and added to the pending cache. A thread-local PipelineLibrary
//! is used to compile the pipeline state, which eliminates all locking for compilation.
//!
//! Pipeline states can be acquired at any time and from any thread. The cache will take a reader lock. During
//! AcquirePipelineState, the global read-only cache is not updated, but the thread-local cache and pending
//! global cache may be. Furthermore, compilations are performed on the calling thread, which means that separate
//! thread may return a pipeline state that is still compiling. It is required that all pending AcquirePipelineState
//! calls complete prior to using the returned pipeline state pointers during command list recording.
//!
//! Example Scenarios:
//!
//! 1. Threads request the same un-cached pipeline state:
//!
//! Both the global read-only cache and thread-local caches miss, one thread wins the race to take a lock
//! on the global pending cache. It allocates but does not compile the pipeline state. All other threads wait on the
//! lock (which should be quick) and then find and return the uninitialized pipeline state. The compiling
//! thread uses the thread-local PipelineLibrary instance to compile the pipeline state. Non-compiling threads
//! will enter the uninitialized pipeline state into their thread-local cache (as does the compiling thread once it
//! completes). Note that the compiling thread is now busy, but all remaining threads are now unblocked to compile other
//! pipeline states.
//!
//! 2. A thread requests a pipeline state being compiled on another thread:
//!
//! In this case, the global read-only cache won't have the pipeline state (since it's being compiled during
//! the current cycle, and the pending cache is only merged at the end of the cycle). It also won't have the
//! entry in the thread-local cache. It then hits the global pending cache, which will return the live instance
//! (being compiled). It then caches the result in its thread-local cache, so that successive requests will no
//! longer require a lock on the pending cache.
//!
//! 3. The cache is warm and all pipeline states are compiled:
//!
//! Each thread hits the same read-only cache (which succeeds) and returns the pipeline state immediately.
//! This is the fast-path case where multiple threads are now able to resolve pipeline states with very
//! little performance overhead.
//!
//! Example Usage:
//! @code{.cpp}
//! // Create library instance.
//! RHI::PipelineLibraryHandle libraryHandle = pipelineStateCache->CreateLibrary(serializedData); // Initial data loaded from disk.
//!
//! // In jobs. Lots and lots of requests.
//! const RHI::PipelineState* pipelineState = pipelineStateCache->AcquirePipelineState(libraryHandle, descriptor);
//!
//! // Reset contents of library. Releases all pipeline state references. Library remains valid.
//! pipelineStateCache->ResetLibrary(libraryHandle);
//!
//! // Release library and all held references.
//! pipelineStateCache->ReleaseLibrary(libraryHandle);
//! @endcode
//!
class PipelineStateCache final
: public AZStd::intrusive_base
{
public:
AZ_CLASS_ALLOCATOR(PipelineStateCache, SystemAllocator, 0);
/**
* The maximum number of libraries is configurable at compile time. A fixed number is used
* to avoid having to lazily resize thread-local arrays when traversing them, and also to
* avoid a pointer indirection on access.
*/
//! The maximum number of libraries is configurable at compile time. A fixed number is used
//! to avoid having to lazily resize thread-local arrays when traversing them, and also to
//! avoid a pointer indirection on access.
static const size_t LibraryCountMax = 256;
static Ptr<PipelineStateCache> Create(Device& device);
/// Resets the caches of all pipeline libraries back to empty. All internal references to pipeline states are released.
//! Resets the caches of all pipeline libraries back to empty. All internal references to pipeline states are released.
void Reset();
/// Creates an internal pipeline library instance and returns its handle.
PipelineLibraryHandle CreateLibrary(const PipelineLibraryData* serializedData);
//! Creates an internal pipeline library instance and returns its handle.
PipelineLibraryHandle CreateLibrary(const PipelineLibraryData* serializedData, const AZStd::string& filePath = "");
/// Releases the pipeline library and purges it from the cache. Releases all held references to pipeline states for the library.
//! Releases the pipeline library and purges it from the cache. Releases all held references to pipeline states for the library.
void ReleaseLibrary(PipelineLibraryHandle handle);
/// Resets cache contents in the library. Releases all held references to pipeline states for the library.
//! Resets cache contents in the library. Releases all held references to pipeline states for the library.
void ResetLibrary(PipelineLibraryHandle handle);
/// Returns the serialized data for the library, which can be used to re-initialize it.
ConstPtr<PipelineLibraryData> GetLibrarySerializedData(PipelineLibraryHandle handle) const;
/**
* Acquires a pipeline state (either draw or dispatch variants) from the cache. Pipeline states are associated
* to a specific library handle. Successive calls with the same pipeline state descriptor hash will return the same
* pipeline state, even across threads. If the library handle is invalid or the acquire operation fails, a null pointer
* is returned. Otherwise, a valid pipeline state pointer is returned (regardless of whether pipeline state compilation succeeds).
*
* It is permitted to take a strong reference to the returned pointer, but is not necessary as long as the reference
* is discarded on a library reset / release event. The cache will store a reference internally. If a strong reference
* is held externally, the instance will remain valid even after the cache is reset / destroyed.
*/
//! Returns the resulting merged library from all the threadLibraries related to the passed in handle.
//! The merged library can be used to write out the serialized data.
Ptr<PipelineLibrary> GetMergedLibrary(PipelineLibraryHandle handle) const;
//! Acquires a pipeline state (either draw or dispatch variants) from the cache. Pipeline states are associated
//! to a specific library handle. Successive calls with the same pipeline state descriptor hash will return the same
//! pipeline state, even across threads. If the library handle is invalid or the acquire operation fails, a null pointer
//! is returned. Otherwise, a valid pipeline state pointer is returned (regardless of whether pipeline state compilation succeeds).
//!
//! It is permitted to take a strong reference to the returned pointer, but is not necessary as long as the reference
//! is discarded on a library reset / release event. The cache will store a reference internally. If a strong reference
//! is held externally, the instance will remain valid even after the cache is reset / destroyed.
const PipelineState* AcquirePipelineState(PipelineLibraryHandle library, const PipelineStateDescriptor& descriptor);
/**
* This method merges the global pending cache into the global read-only cache and clears all thread-local caches.
* This reduces the total memory footprint of the caches and optimizes subsequent fetches. This method should be called
* once per frame.
*/
//! This method merges the global pending cache into the global read-only cache and clears all thread-local caches.
//! This reduces the total memory footprint of the caches and optimizes subsequent fetches. This method should be called
//! once per frame.
void Compact();
private:
@ -198,8 +192,9 @@ namespace AZ
// Tracks the number of pipeline states actively being compiled across all threads.
AZStd::atomic_uint32_t m_pendingCompileCount = {0};
// Used to prime the thread libraries.
ConstPtr<PipelineLibraryData> m_serializedData;
// Contains the initial serialized data (Used to prime the thread libraries)
// or the file name that contains the serialized data
PipelineLibraryDescriptor m_pipelineLibraryDescriptor;
};
using GlobalLibrarySet = AZStd::fixed_vector<GlobalLibraryEntry, LibraryCountMax>;
@ -209,36 +204,32 @@ namespace AZ
// A thread-local cache used to reduce contention on the global pending cache.
PipelineStateSet m_threadLocalCache;
/**
* Each thread has its own pipeline library. This allows threads to cache disjoint
* pipeline states without locking. The libraries are coalesced into a single library
* during GetLibrarySerializedData. The library is lazily initialized on the thread
* and uses the initial serialized data passed in at creation time.
*/
//! Each thread has its own pipeline library. This allows threads to cache disjoint
//! pipeline states without locking. The libraries are coalesced into a single library
//! during GetMergedLibrary. The library is lazily initialized on the thread
//! and uses the initial serialized data passed in at creation time.
Ptr<PipelineLibrary> m_library;
};
/**
* Each thread has its own list of pipeline library entries. The index maps 1-to-1 with GlobalLibrarySet.
* GlobalLibrarySet contains the total size of the array; whereas the ThreadLibrarySet is just an array.
* The size of the global set should be used when traversing the thread library entries.
*/
//! Each thread has its own list of pipeline library entries. The index maps 1-to-1 with GlobalLibrarySet.
//! GlobalLibrarySet contains the total size of the array; whereas the ThreadLibrarySet is just an array.
//! The size of the global set should be used when traversing the thread library entries.
using ThreadLibrarySet = AZStd::array<ThreadLibraryEntry, LibraryCountMax>;
/// Helper function which binary searches a pipeline state set looking for an entry which matches the requested descriptor.
//! Helper function which binary searches a pipeline state set looking for an entry which matches the requested descriptor.
static const PipelineState* FindPipelineState(const PipelineStateSet& pipelineStateSet, const PipelineStateDescriptor& descriptor);
/// Helper function which inserts an entry into the set. Returns true if the entry was inserted, or false is a duplicate entry existed.
//! Helper function which inserts an entry into the set. Returns true if the entry was inserted, or false is a duplicate entry existed.
static bool InsertPipelineState(PipelineStateSet& pipelineStateSet, PipelineStateEntry pipelineStateEntry);
/// Performs a pipeline state compilation on the global cache using the thread-local pipeline library.
//! Performs a pipeline state compilation on the global cache using the thread-local pipeline library.
ConstPtr<PipelineState> CompilePipelineState(
GlobalLibraryEntry& globalLibraryEntry,
ThreadLibraryEntry& threadLibraryEntry,
const PipelineStateDescriptor& pipelineStateDescriptor,
PipelineStateHash pipelineStateHash);
/// Resets the library without validating the handle or taking a lock.
//! Resets the library without validating the handle or taking a lock.
void ResetLibraryImpl(PipelineLibraryHandle handle);
Ptr<Device> m_device;

@ -25,7 +25,7 @@ namespace AZ
return true;
}
ResultCode PipelineLibrary::Init(Device& device, const PipelineLibraryData* serializedData)
ResultCode PipelineLibrary::Init(Device& device, const PipelineLibraryDescriptor& descriptor)
{
if (Validation::IsEnabled())
{
@ -36,7 +36,7 @@ namespace AZ
}
}
ResultCode resultCode = InitInternal(device, serializedData);
ResultCode resultCode = InitInternal(device, descriptor);
if (resultCode == ResultCode::Success)
{
DeviceObject::Init(device);
@ -72,6 +72,16 @@ namespace AZ
return GetSerializedDataInternal();
}
bool PipelineLibrary::SaveSerializedData(const AZStd::string& filePath) const
{
if (!ValidateIsInitialized())
{
return false;
}
return SaveSerializedDataInternal(filePath);
}
bool PipelineLibrary::IsMergeRequired() const
{

@ -81,7 +81,7 @@ namespace AZ
}
}
PipelineLibraryHandle PipelineStateCache::CreateLibrary(const PipelineLibraryData* serializedData)
PipelineLibraryHandle PipelineStateCache::CreateLibrary(const PipelineLibraryData* serializedData, const AZStd::string& filePath)
{
AZStd::unique_lock<AZStd::shared_mutex> lock(m_mutex);
@ -110,8 +110,8 @@ namespace AZ
m_globalLibraryActiveBits[handle.GetIndex()] = true;
GlobalLibraryEntry& libraryEntry = m_globalLibrarySet[handle.GetIndex()];
libraryEntry.m_serializedData = serializedData;
libraryEntry.m_pipelineLibraryDescriptor.m_serializedData = serializedData;
libraryEntry.m_pipelineLibraryDescriptor.m_filePath = filePath;
AZ_Assert(libraryEntry.m_readOnlyCache.empty() && libraryEntry.m_pendingCache.empty(), "Library entry has entries in its caches!");
return handle;
@ -128,8 +128,9 @@ namespace AZ
GlobalLibraryEntry& libraryEntry = m_globalLibrarySet[handle.GetIndex()];
libraryEntry.m_readOnlyCache.clear();
libraryEntry.m_serializedData = nullptr;
libraryEntry.m_pipelineLibraryDescriptor.m_serializedData = nullptr;
libraryEntry.m_pipelineLibraryDescriptor.m_filePath = "";
m_globalLibraryActiveBits[handle.GetIndex()] = false;
m_libraryFreeList.push_back(handle);
}
@ -162,7 +163,7 @@ namespace AZ
libraryEntry.m_pendingCacheMutex.unlock();
}
ConstPtr<PipelineLibraryData> PipelineStateCache::GetLibrarySerializedData(PipelineLibraryHandle handle) const
Ptr<PipelineLibrary> PipelineStateCache::GetMergedLibrary(PipelineLibraryHandle handle) const
{
if (handle.IsNull())
{
@ -188,7 +189,7 @@ namespace AZ
}
});
bool doesPSODataExist = entry.m_serializedData.get();
bool doesPSODataExist = entry.m_pipelineLibraryDescriptor.m_serializedData.get();
for (const RHI::PipelineLibrary* libraryBase : threadLibraries)
{
const PipelineLibrary* library = static_cast<const PipelineLibrary*>(libraryBase);
@ -198,7 +199,7 @@ namespace AZ
if (doesPSODataExist)
{
Ptr<PipelineLibrary> pipelineLibrary = Factory::Get().CreatePipelineLibrary();
ResultCode resultCode = pipelineLibrary->Init(*m_device, entry.m_serializedData.get());
ResultCode resultCode = pipelineLibrary->Init(*m_device, entry.m_pipelineLibraryDescriptor);
if (resultCode == ResultCode::Success)
{
@ -206,7 +207,7 @@ namespace AZ
if (resultCode == ResultCode::Success)
{
return pipelineLibrary->GetSerializedData();
return pipelineLibrary;
}
}
}
@ -316,7 +317,7 @@ namespace AZ
if (!threadLibraryEntry.m_library)
{
Ptr<PipelineLibrary> pipelineLibrary = Factory::Get().CreatePipelineLibrary();
RHI::ResultCode resultCode = pipelineLibrary->Init(*m_device, globalLibraryEntry.m_serializedData.get());
RHI::ResultCode resultCode = pipelineLibrary->Init(*m_device, globalLibraryEntry.m_pipelineLibraryDescriptor);
if (resultCode != RHI::ResultCode::Success)
{
AZ_Warning("PipelineStateCache", false, "Failed to initialize pipeline library. PipelineLibrary usage is disabled.");

@ -23,10 +23,11 @@ namespace UnitTest
AZStd::unordered_map<uint64_t, const AZ::RHI::PipelineState*> m_pipelineStates;
private:
AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, const AZ::RHI::PipelineLibraryData*) override { return AZ::RHI::ResultCode::Success; }
AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, const RHI::PipelineLibraryDescriptor&) override { return AZ::RHI::ResultCode::Success; }
void ShutdownInternal() override;
AZ::RHI::ResultCode MergeIntoInternal(AZStd::span<const AZ::RHI::PipelineLibrary* const>) override;
AZ::RHI::ConstPtr<AZ::RHI::PipelineLibraryData> GetSerializedDataInternal() const override { return nullptr; }
bool SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const override { return false; }
};
class PipelineState

@ -189,12 +189,12 @@ namespace UnitTest
RHI::Ptr<RHI::Device> device = MakeTestDevice();
RHI::Ptr<RHI::PipelineLibrary> pipelineLibrary = RHI::Factory::Get().CreatePipelineLibrary();
RHI::ResultCode resultCode = pipelineLibrary->Init(*device, nullptr);
RHI::ResultCode resultCode = pipelineLibrary->Init(*device, RHI::PipelineLibraryDescriptor{});
EXPECT_EQ(resultCode, RHI::ResultCode::Success);
// Second init should fail and throw validation error.
AZ_TEST_START_ASSERTTEST;
resultCode = pipelineLibrary->Init(*device, nullptr);
resultCode = pipelineLibrary->Init(*device, RHI::PipelineLibraryDescriptor{});
AZ_TEST_STOP_ASSERTTEST(1);
EXPECT_EQ(resultCode, RHI::ResultCode::InvalidOperation);
@ -249,7 +249,7 @@ namespace UnitTest
// Calling library methods with a null handle should early out.
pipelineStateCache->ResetLibrary({});
pipelineStateCache->ReleaseLibrary({});
EXPECT_EQ(pipelineStateCache->GetLibrarySerializedData({}), nullptr);
EXPECT_EQ(pipelineStateCache->GetMergedLibrary({}), nullptr);
EXPECT_EQ(pipelineStateCache->AcquirePipelineState({}, CreatePipelineStateDescriptor(0)), nullptr);
pipelineStateCache->Compact();
ValidateCacheIntegrity(pipelineStateCache);

@ -40,7 +40,7 @@ namespace AZ
return aznew PipelineLibrary;
}
RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, [[maybe_unused]] const RHI::PipelineLibraryData* serializedData)
RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, [[maybe_unused]] const RHI::PipelineLibraryDescriptor& descriptor)
{
Device& device = static_cast<Device&>(deviceBase);
ID3D12DeviceX* dx12Device = device.GetDevice();
@ -57,9 +57,9 @@ namespace AZ
}
if (serializedData && shouldCreateLibFromSerializedData)
if (descriptor.m_serializedData && shouldCreateLibFromSerializedData)
{
bytes = serializedData->GetData();
bytes = descriptor.m_serializedData->GetData();
}
Microsoft::WRL::ComPtr<ID3D12PipelineLibraryX> libraryComPtr;
@ -70,7 +70,7 @@ namespace AZ
if (SUCCEEDED(hr))
{
m_serializedData = serializedData;
m_serializedData = descriptor.m_serializedData;
}
else
{
@ -270,5 +270,13 @@ namespace AZ
return false;
#endif
}
bool PipelineLibrary::SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const
{
// DX12 drivers cannot save serialized data
[[maybe_unused]] Device& device = static_cast<Device&>(GetDevice());
AZ_Assert(!device.GetFeatures().m_isPsoCacheFileOperationsNeeded, "Explicit PSO cache operations should not be disabled for DX12");
return false;
}
}
}

@ -31,11 +31,12 @@ namespace AZ
//////////////////////////////////////////////////////////////////////////
// RHI::PipelineLibrary
RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryData* serializedData) override;
RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryDescriptor& descriptor) override;
void ShutdownInternal() override;
RHI::ResultCode MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> libraries) override;
RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override;
bool IsMergeRequired() const;
bool SaveSerializedDataInternal(const AZStd::string& filePath) const override;
//////////////////////////////////////////////////////////////////////////
ID3D12DeviceX* m_dx12Device = nullptr;

@ -24,7 +24,7 @@ namespace Platform
{
AZ::RHI::PhysicalDeviceList physicalDeviceList;
AZ::Metal::PhysicalDevice* physicalDevice = aznew AZ::Metal::PhysicalDevice;
physicalDevice->Init(nil);
physicalDevice->Init(MTLCreateSystemDefaultDevice());
physicalDeviceList.emplace_back(physicalDevice);
return physicalDeviceList;
}

@ -73,11 +73,11 @@ namespace AZ
const auto* sourceBuffer = static_cast<const Buffer*>(descriptor.m_sourceBuffer);
const auto* destinationBuffer = static_cast<const Buffer*>(descriptor.m_destinationBuffer);
[blitEncoder copyFromBuffer:sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
sourceOffset:descriptor.m_sourceOffset
toBuffer:destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
destinationOffset:descriptor.m_destinationOffset
size:descriptor.m_size];
[blitEncoder copyFromBuffer: sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
sourceOffset: descriptor.m_sourceOffset
toBuffer: destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
destinationOffset: descriptor.m_destinationOffset
size: descriptor.m_size];
Platform::SynchronizeBufferOnGPU(blitEncoder, destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>());
break;
@ -101,14 +101,14 @@ namespace AZ
descriptor.m_destinationOrigin.m_front);
[blitEncoder copyFromTexture: sourceImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
sourceSlice: descriptor.m_sourceSubresource.m_arraySlice
sourceLevel: descriptor.m_sourceSubresource.m_mipSlice
sourceOrigin: sourceOrigin
sourceSize: sourceSize
toTexture: destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
destinationSlice: descriptor.m_destinationSubresource.m_arraySlice
destinationLevel: descriptor.m_destinationSubresource.m_mipSlice
destinationOrigin: destinationOrigin];
sourceSlice: descriptor.m_sourceSubresource.m_arraySlice
sourceLevel: descriptor.m_sourceSubresource.m_mipSlice
sourceOrigin: sourceOrigin
sourceSize: sourceSize
toTexture: destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
destinationSlice: descriptor.m_destinationSubresource.m_arraySlice
destinationLevel: descriptor.m_destinationSubresource.m_mipSlice
destinationOrigin: destinationOrigin];
Platform::SynchronizeTextureOnGPU(blitEncoder, destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>());
break;
@ -127,15 +127,15 @@ namespace AZ
descriptor.m_sourceSize.m_height,
descriptor.m_sourceSize.m_depth);
[blitEncoder copyFromBuffer:sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
sourceOffset:sourceBuffer->GetMemoryView().GetOffset() + descriptor.m_sourceOffset
sourceBytesPerRow:descriptor.m_sourceBytesPerRow
sourceBytesPerImage:descriptor.m_sourceBytesPerImage
sourceSize:sourceSize
toTexture:destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
destinationSlice:descriptor.m_destinationSubresource.m_arraySlice
destinationLevel:descriptor.m_destinationSubresource.m_mipSlice
destinationOrigin:destinationOrigin];
[blitEncoder copyFromBuffer: sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
sourceOffset: sourceBuffer->GetMemoryView().GetOffset() + descriptor.m_sourceOffset
sourceBytesPerRow: descriptor.m_sourceBytesPerRow
sourceBytesPerImage: descriptor.m_sourceBytesPerImage
sourceSize: sourceSize
toTexture: destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
destinationSlice: descriptor.m_destinationSubresource.m_arraySlice
destinationLevel: descriptor.m_destinationSubresource.m_mipSlice
destinationOrigin: destinationOrigin];
Platform::SynchronizeTextureOnGPU(blitEncoder, destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>());
break;
@ -154,15 +154,15 @@ namespace AZ
descriptor.m_sourceSize.m_height,
descriptor.m_sourceSize.m_depth);
[blitEncoder copyFromTexture:sourceImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
sourceSlice:descriptor.m_sourceSubresource.m_arraySlice
sourceLevel:descriptor.m_sourceSubresource.m_mipSlice
sourceOrigin:sourceOrigin
sourceSize:sourceSize
toBuffer:destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
destinationOffset:destinationBuffer->GetMemoryView().GetOffset() + descriptor.m_destinationOffset
destinationBytesPerRow:descriptor.m_destinationBytesPerRow
destinationBytesPerImage:descriptor.m_destinationBytesPerImage];
[blitEncoder copyFromTexture: sourceImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
sourceSlice: descriptor.m_sourceSubresource.m_arraySlice
sourceLevel: descriptor.m_sourceSubresource.m_mipSlice
sourceOrigin: sourceOrigin
sourceSize: sourceSize
toBuffer: destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
destinationOffset: destinationBuffer->GetMemoryView().GetOffset() + descriptor.m_destinationOffset
destinationBytesPerRow: descriptor.m_destinationBytesPerRow
destinationBytesPerImage: descriptor.m_destinationBytesPerImage];
Platform::SynchronizeBufferOnGPU(blitEncoder, destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>());
break;
@ -192,7 +192,7 @@ namespace AZ
id<MTLComputeCommandEncoder> computeEncoder = GetEncoder<id<MTLComputeCommandEncoder>>();
[computeEncoder dispatchThreadgroups: numThreadGroup
threadsPerThreadgroup: threadsPerGroup];
threadsPerThreadgroup: threadsPerGroup];
}
@ -235,8 +235,8 @@ namespace AZ
{
id<MTLComputeCommandEncoder> computeEncoder = GetEncoder<id<MTLComputeCommandEncoder>>();
[computeEncoder setBytes: item.m_rootConstants
length: pipelineLayout.GetRootConstantsSize()
atIndex: pipelineLayout.GetRootConstantsSlotIndex()];
length: pipelineLayout.GetRootConstantsSize()
atIndex: pipelineLayout.GetRootConstantsSlotIndex()];
}
}
@ -434,25 +434,25 @@ namespace AZ
case RHI::ShaderStage::Vertex:
{
id<MTLRenderCommandEncoder> renderEncoder = GetEncoder<id<MTLRenderCommandEncoder>>();
[renderEncoder setVertexBuffers:&mtlArgBuffers[startingIndex]
offsets:&mtlArgBufferOffsets[startingIndex]
withRange:range];
[renderEncoder setVertexBuffers: &mtlArgBuffers[startingIndex]
offsets: &mtlArgBufferOffsets[startingIndex]
withRange: range];
break;
}
case RHI::ShaderStage::Fragment:
{
id<MTLRenderCommandEncoder> renderEncoder = GetEncoder<id<MTLRenderCommandEncoder>>();
[renderEncoder setFragmentBuffers:&mtlArgBuffers[startingIndex]
offsets:&mtlArgBufferOffsets[startingIndex]
withRange:range];
[renderEncoder setFragmentBuffers: &mtlArgBuffers[startingIndex]
offsets: &mtlArgBufferOffsets[startingIndex]
withRange: range];
break;
}
case RHI::ShaderStage::Compute:
{
id<MTLComputeCommandEncoder> computeEncoder = GetEncoder<id<MTLComputeCommandEncoder>>();
[computeEncoder setBuffers:&mtlArgBuffers[startingIndex]
offsets:&mtlArgBufferOffsets[startingIndex]
withRange:range];
[computeEncoder setBuffers: &mtlArgBuffers[startingIndex]
offsets: &mtlArgBufferOffsets[startingIndex]
withRange: range];
break;
}
default:
@ -537,13 +537,13 @@ namespace AZ
uint32_t indexOffset = indexBuffDescriptor.GetByteOffset() + (indexed.m_indexOffset * indexTypeSize) + buff->GetMemoryView().GetOffset();
[renderEncoder drawIndexedPrimitives: mtlPrimType
indexCount: indexed.m_indexCount
indexType: mtlIndexType
indexBuffer: mtlBuff
indexBufferOffset: indexOffset
instanceCount: indexed.m_instanceCount
baseVertex: indexed.m_vertexOffset
baseInstance: indexed.m_instanceOffset];
indexCount: indexed.m_indexCount
indexType: mtlIndexType
indexBuffer: mtlBuff
indexBufferOffset: indexOffset
instanceCount: indexed.m_instanceCount
baseVertex: indexed.m_vertexOffset
baseInstance: indexed.m_instanceOffset];
break;
}
@ -551,10 +551,10 @@ namespace AZ
{
const RHI::DrawLinear& linear = drawItem.m_arguments.m_linear;
[renderEncoder drawPrimitives: mtlPrimType
vertexStart: linear.m_vertexOffset
vertexCount: linear.m_vertexCount
instanceCount: linear.m_instanceCount
baseInstance: linear.m_instanceOffset];
vertexStart: linear.m_vertexOffset
vertexCount: linear.m_vertexCount
instanceCount: linear.m_instanceCount
baseInstance: linear.m_instanceOffset];
break;
}
}

@ -15,6 +15,7 @@
#include <RHI/Conversions.h>
#include <RHI/Device.h>
#include <RHI/Metal.h>
#include <RHI/PhysicalDevice.h>
//Symbols related to Obj-c categories are getting stripped out as part of the link step for monolithic builds
//This forces the linker to not strip symbols related to categories without actually referencing the dummy function.
@ -40,9 +41,10 @@ namespace AZ
return aznew Device();
}
RHI::ResultCode Device::InitInternal(RHI::PhysicalDevice& physicalDevice)
RHI::ResultCode Device::InitInternal(RHI::PhysicalDevice& physicalDeviceBase)
{
m_metalDevice = MTLCreateSystemDefaultDevice();
PhysicalDevice& physicalDevice = static_cast<PhysicalDevice&>(physicalDeviceBase);
m_metalDevice = physicalDevice.GetNativeDevice();
AZ_Assert(m_metalDevice, "Native device wasnt created");
m_eventListener = [[MTLSharedEventListener alloc] init];
@ -340,6 +342,9 @@ namespace AZ
m_features.m_customResolvePositions = m_metalDevice.programmableSamplePositionsSupported;
m_features.m_indirectDrawSupport = false;
//Metal drivers save and load serialized PipelineLibrary internally
m_features.m_isPsoCacheFileOperationsNeeded = false;
RHI::QueryTypeFlags counterSamplingFlags = RHI::QueryTypeFlags::None;
bool supportsInterDrawTimestamps = true;

@ -15,11 +15,15 @@ namespace Platform
AZ::RHI::PhysicalDeviceList EnumerateDevices();
}
namespace AZ
{
namespace Metal
{
id<MTLDevice> PhysicalDevice::GetNativeDevice()
{
return m_mtlNativeDevice;
}
RHI::PhysicalDeviceList PhysicalDevice::Enumerate()
{
return Platform::EnumerateDevices();
@ -29,15 +33,34 @@ namespace AZ
{
if(mtlDevice)
{
m_mtlNativeDevice = mtlDevice;
NSString * deviceName = [mtlDevice name];
const char * secondName = [ deviceName UTF8String ];
m_descriptor.m_description = AZStd::string(secondName);
m_descriptor.m_deviceId = [mtlDevice registryID];
const char * deviceNameCStr = [ deviceName UTF8String ];
m_descriptor.m_description = AZStd::string(deviceNameCStr);
m_descriptor.m_deviceId = deviceName.hash; //Used for storing PipelineLibraries
//Currently no way of knowing vendor id through metal. Using AMD as a placeholder for now.
m_descriptor.m_vendorId = RHI::VendorId::AMD;
if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::Apple).data()))
{
m_descriptor.m_vendorId = RHI::VendorId::Apple;
}
else if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::Intel).data()))
{
m_descriptor.m_vendorId = RHI::VendorId::Intel;
}
else if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::nVidia).data()))
{
m_descriptor.m_vendorId = RHI::VendorId::nVidia;
}
else if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::AMD).data()))
{
m_descriptor.m_vendorId = RHI::VendorId::AMD;
}
m_descriptor.m_type = Platform::GetPhysicalDeviceType(mtlDevice);
NSOperatingSystemVersion version = [[NSProcessInfo processInfo] operatingSystemVersion];
AZStd::string concatVer = AZStd::string::format("%li%li%li", version.majorVersion, version.minorVersion, version.patchVersion);
m_descriptor.m_driverVersion = AZStd::stoi(concatVer);
}
}

@ -26,8 +26,12 @@ namespace AZ
void Init(id<MTLDevice> mtlDevice);
static RHI::PhysicalDeviceList Enumerate();
id<MTLDevice> GetNativeDevice();
private:
void Shutdown() override;
id<MTLDevice> m_mtlNativeDevice = nil;
};
}
}

@ -8,6 +8,7 @@
#include <RHI/Device.h>
#include <RHI/PipelineLibrary.h>
#include <AzCore/Name/Name.h>
namespace AZ
{
@ -18,17 +19,111 @@ namespace AZ
return aznew PipelineLibrary;
}
RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryData* serializedData)
id<MTLBinaryArchive> PipelineLibrary::GetNativePipelineCache() const
{
return m_mtlBinaryArchive;
}
RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryDescriptor& descriptor)
{
DeviceObject::Init(deviceBase);
auto& device = static_cast<Device&>(deviceBase);
m_descriptor = descriptor;
NSError* error = nil;
MTLBinaryArchiveDescriptor* desc = [[MTLBinaryArchiveDescriptor alloc] init];
NSString* psoCacheFilePath = [NSString stringWithCString:descriptor.m_filePath.c_str() encoding:NSUTF8StringEncoding];
NSURL* filePathURL = [NSURL fileURLWithPath:psoCacheFilePath isDirectory:NO];
//Pass in the file path if it exists
if ([filePathURL checkResourceIsReachableAndReturnError:&error])
{
desc.url = filePathURL;
}
//Create a new Pso cache. Use the existing fOile on disk if provided
m_mtlBinaryArchive = [device.GetMtlDevice() newBinaryArchiveWithDescriptor:desc error:&error];
[desc release];
desc = nil;
SetName(GetName());
NSString* labelName = [NSString stringWithCString:GetName().GetCStr() encoding:NSUTF8StringEncoding];
m_mtlBinaryArchive.label = labelName;
return RHI::ResultCode::Success;
}
void PipelineLibrary::ShutdownInternal()
{
[m_mtlBinaryArchive release];
m_mtlBinaryArchive = nil;
m_renderPipelineStates.clear();
m_computePipelineStates.clear();
}
id<MTLRenderPipelineState> PipelineLibrary::CreateGraphicsPipelineState(uint64_t hash, MTLRenderPipelineDescriptor* pipelineStateDesc)
{
Device& device = static_cast<Device&>(GetDevice());
NSError* error = nil;
AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
NSArray* binArchives = [NSArray arrayWithObjects:m_mtlBinaryArchive,nil];
pipelineStateDesc.binaryArchives = binArchives;
//Create a new PSO. The drivers will use the Pso cache if the PSO resides in it
id<MTLRenderPipelineState> graphicsPipelineState =
[device.GetMtlDevice() newRenderPipelineStateWithDescriptor:pipelineStateDesc
error:&error];
m_renderPipelineStates.emplace(hash, pipelineStateDesc);
return graphicsPipelineState;
}
id<MTLComputePipelineState> PipelineLibrary::CreateComputePipelineState(uint64_t hash, MTLComputePipelineDescriptor* pipelineStateDesc)
{
Device& device = static_cast<Device&>(GetDevice());
NSError* error = nil;
MTLComputePipelineReflection* ref;
AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
NSArray* binArchives = [NSArray arrayWithObjects:m_mtlBinaryArchive,nil];
pipelineStateDesc.binaryArchives = binArchives;
//Create a new PSO. The drivers will use the Pso cache if the PSO resides in it
id<MTLComputePipelineState> computePipelineState =
[device.GetMtlDevice() newComputePipelineStateWithDescriptor: pipelineStateDesc
options: MTLPipelineOptionBufferTypeInfo
reflection: &ref
error: &error];
m_computePipelineStates.emplace(hash, pipelineStateDesc);
return computePipelineState;
}
RHI::ResultCode PipelineLibrary::MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> pipelineLibraries)
{
AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
NSError* error = nil;
for (const RHI::PipelineLibrary* libraryBase : pipelineLibraries)
{
const PipelineLibrary* library = static_cast<const PipelineLibrary*>(libraryBase);
for (const auto& pipelineStateEntry : library->m_renderPipelineStates)
{
if (m_renderPipelineStates.find(pipelineStateEntry.first) == m_renderPipelineStates.end())
{
[m_mtlBinaryArchive addRenderPipelineFunctionsWithDescriptor:pipelineStateEntry.second
error:&error];
m_renderPipelineStates.emplace(pipelineStateEntry.first, pipelineStateEntry.second);
}
}
for (const auto& pipelineStateEntry : library->m_computePipelineStates)
{
if (m_computePipelineStates.find(pipelineStateEntry.first) == m_computePipelineStates.end())
{
[m_mtlBinaryArchive addComputePipelineFunctionsWithDescriptor:pipelineStateEntry.second
error:&error];
m_computePipelineStates.emplace(pipelineStateEntry.first, pipelineStateEntry.second);
}
}
}
return RHI::ResultCode::Success;
}
@ -36,5 +131,39 @@ namespace AZ
{
return nullptr;
}
bool PipelineLibrary::SaveSerializedDataInternal(const AZStd::string& filePath) const
{
AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
NSError* error = nil;
NSString* psoCacheFilePath = [NSString stringWithCString:filePath.c_str() encoding:NSUTF8StringEncoding];
NSURL *baseURL = [NSURL fileURLWithPath:psoCacheFilePath];
BOOL isDir;
NSFileManager *fileManager= [NSFileManager defaultManager];
NSString *directory = [psoCacheFilePath stringByDeletingLastPathComponent];
//If the directory where the PSO cache will reside does not exist create one
if(![fileManager fileExistsAtPath:directory isDirectory:&isDir])
{
if(![fileManager createDirectoryAtPath:directory withIntermediateDirectories:YES attributes:nil error:NULL])
{
AZ_Error("PipelineStateCache", false, "Error: Unable to create the folder %s in order to save the PSO Cache", psoCacheFilePath);
return false;
}
}
if(m_mtlBinaryArchive)
{
[m_mtlBinaryArchive serializeToURL:baseURL
error:&error];
}
return error==nil;
}
bool PipelineLibrary::IsMergeRequired() const
{
return !m_renderPipelineStates.empty() || !m_computePipelineStates.empty();
}
}
}

@ -22,18 +22,30 @@ namespace AZ
AZ_DISABLE_COPY_MOVE(PipelineLibrary);
static RHI::Ptr<PipelineLibrary> Create();
id<MTLBinaryArchive> GetNativePipelineCache() const;
id<MTLRenderPipelineState> CreateGraphicsPipelineState(uint64_t hash, MTLRenderPipelineDescriptor* pipelineStateDesc);
id<MTLComputePipelineState> CreateComputePipelineState(uint64_t hash, MTLComputePipelineDescriptor* pipelineStateDesc);
private:
PipelineLibrary() = default;
//////////////////////////////////////////////////////////////////////////
// RHI::PipelineLibrary
RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryData* serializedData) override;
RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryDescriptor& descriptor) override;
void ShutdownInternal() override;
RHI::ResultCode MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> libraries) override;
RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override;
bool IsMergeRequired() const;
bool SaveSerializedDataInternal(const AZStd::string& filePath) const;
//////////////////////////////////////////////////////////////////////////
RHI::PipelineLibraryDescriptor m_descriptor;
id<MTLBinaryArchive> m_mtlBinaryArchive = nil;
mutable AZStd::mutex m_mutex;
// Internally tracks additions to the library. Used when merging libraries together.
AZStd::unordered_map<uint64_t, MTLRenderPipelineDescriptor*> m_renderPipelineStates;
AZStd::unordered_map<uint64_t, MTLComputePipelineDescriptor*> m_computePipelineStates;
};
}
}

@ -13,6 +13,7 @@
#include <RHI/Conversions.h>
#include <RHI/Device.h>
#include <RHI/PipelineState.h>
#include <RHI/PipelineLibrary.h>
namespace AZ
{
@ -110,8 +111,6 @@ namespace AZ
[source release];
source = nil;
return pFunction;
}
@ -119,68 +118,71 @@ namespace AZ
const RHI::PipelineStateDescriptorForDraw& descriptor,
RHI::PipelineLibrary* pipelineLibraryBase)
{
NSError* error = 0;
Device& device = static_cast<Device&>(deviceBase);
RHI::ConstPtr<PipelineLayout> pipelineLayout = device.AcquirePipelineLayout(*descriptor.m_pipelineLayoutDescriptor);
AZ_Assert(pipelineLayout, "PipelineLayout can not be null");
const RHI::RenderAttachmentConfiguration& attachmentsConfiguration = descriptor.m_renderAttachmentConfiguration;
MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init];
m_renderPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init];
for (AZ::u32 i = 0; i < attachmentsConfiguration.GetRenderTargetCount(); ++i)
{
desc.colorAttachments[i].pixelFormat = ConvertPixelFormat(attachmentsConfiguration.GetRenderTargetFormat(i));
desc.colorAttachments[i].writeMask = ConvertColorWriteMask(descriptor.m_renderStates.m_blendState.m_targets[i].m_writeMask);
desc.colorAttachments[i].blendingEnabled = descriptor.m_renderStates.m_blendState.m_targets[i].m_enable;
desc.colorAttachments[i].alphaBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaOp);
desc.colorAttachments[i].rgbBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendOp);
desc.colorAttachments[i].destinationAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaDest);
desc.colorAttachments[i].destinationRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendDest);;
desc.colorAttachments[i].sourceAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaSource);
desc.colorAttachments[i].sourceRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendSource);;
m_renderPipelineDesc.colorAttachments[i].pixelFormat = ConvertPixelFormat(attachmentsConfiguration.GetRenderTargetFormat(i));
m_renderPipelineDesc.colorAttachments[i].writeMask = ConvertColorWriteMask(descriptor.m_renderStates.m_blendState.m_targets[i].m_writeMask);
m_renderPipelineDesc.colorAttachments[i].blendingEnabled = descriptor.m_renderStates.m_blendState.m_targets[i].m_enable;
m_renderPipelineDesc.colorAttachments[i].alphaBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaOp);
m_renderPipelineDesc.colorAttachments[i].rgbBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendOp);
m_renderPipelineDesc.colorAttachments[i].destinationAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaDest);
m_renderPipelineDesc.colorAttachments[i].destinationRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendDest);;
m_renderPipelineDesc.colorAttachments[i].sourceAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaSource);
m_renderPipelineDesc.colorAttachments[i].sourceRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendSource);;
}
MTLVertexDescriptor* vertexDescriptor = [[MTLVertexDescriptor alloc] init];
ConvertInputElements(descriptor.m_inputStreamLayout, vertexDescriptor);
desc.vertexDescriptor = vertexDescriptor;
m_renderPipelineDesc.vertexDescriptor = vertexDescriptor;
[vertexDescriptor release];
vertexDescriptor = nil;
desc.vertexFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_vertexFunction.get());
AZ_Assert(desc.vertexFunction, "Vertex mtlFuntion can not be null");
desc.fragmentFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_fragmentFunction.get());
m_renderPipelineDesc.vertexFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_vertexFunction.get());
AZ_Assert(m_renderPipelineDesc.vertexFunction, "Vertex mtlFuntion can not be null");
m_renderPipelineDesc.fragmentFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_fragmentFunction.get());
RHI::Format depthStencilFormat = attachmentsConfiguration.GetDepthStencilFormat();
if(descriptor.m_renderStates.m_depthStencilState.m_stencil.m_enable || IsDepthStencilMerged(depthStencilFormat))
{
desc.stencilAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
m_renderPipelineDesc.stencilAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
}
//Depthstencil state
if(descriptor.m_renderStates.m_depthStencilState.m_depth.m_enable || IsDepthStencilMerged(depthStencilFormat))
{
desc.depthAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
m_renderPipelineDesc.depthAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
MTLDepthStencilDescriptor* depthStencilDesc = [[MTLDepthStencilDescriptor alloc] init];
ConvertDepthStencilState(descriptor.m_renderStates.m_depthStencilState, depthStencilDesc);
m_depthStencilState = [device.GetMtlDevice() newDepthStencilStateWithDescriptor:depthStencilDesc];
AZ_Assert(m_depthStencilState, "Could not create Depth Stencil state.");
[m_depthStencilState retain];
[depthStencilDesc release];
depthStencilDesc = nil;
}
desc.sampleCount = descriptor.m_renderStates.m_multisampleState.m_samples;
desc.alphaToCoverageEnabled = descriptor.m_renderStates.m_blendState.m_alphaToCoverageEnable;
m_renderPipelineDesc.sampleCount = descriptor.m_renderStates.m_multisampleState.m_samples;
m_renderPipelineDesc.alphaToCoverageEnabled = descriptor.m_renderStates.m_blendState.m_alphaToCoverageEnable;
NSError* error = 0;
MTLRenderPipelineReflection* ref;
m_graphicsPipelineState = [device.GetMtlDevice() newRenderPipelineStateWithDescriptor:desc options : MTLPipelineOptionBufferTypeInfo reflection : &ref error : &error];
PipelineLibrary* pipelineLibrary = static_cast<PipelineLibrary*>(pipelineLibraryBase);
if (pipelineLibrary && pipelineLibrary->IsInitialized())
{
m_graphicsPipelineState = pipelineLibrary->CreateGraphicsPipelineState(static_cast<uint64_t>(descriptor.GetHash()), m_renderPipelineDesc);
}
else
{
MTLRenderPipelineReflection* ref;
m_graphicsPipelineState = [device.GetMtlDevice() newRenderPipelineStateWithDescriptor:m_renderPipelineDesc options : MTLPipelineOptionBufferTypeInfo reflection : &ref error : &error];
}
AZ_Assert(m_graphicsPipelineState, "Could not create Pipeline object!.");
[m_graphicsPipelineState retain];
[desc release];
desc = nil;
m_pipelineStateMultiSampleState = descriptor.m_renderStates.m_multisampleState;
//Cache the rasterizer state
@ -207,20 +209,25 @@ namespace AZ
RHI::PipelineLibrary* pipelineLibraryBase)
{
Device& device = static_cast<Device&>(deviceBase);
MTLComputePipelineDescriptor* desc = [[MTLComputePipelineDescriptor alloc] init];
NSError* error = 0;
m_computePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
RHI::ConstPtr<PipelineLayout> pipelineLayout = device.AcquirePipelineLayout(*descriptor.m_pipelineLayoutDescriptor);
AZ_Assert(pipelineLayout, "PipelineLayout can not be null");
desc.computeFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_computeFunction.get());
AZ_Assert(desc.computeFunction, "Compute mtlFuntion can not be null");
m_computePipelineDesc.computeFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_computeFunction.get());
AZ_Assert(m_computePipelineDesc.computeFunction, "Compute mtlFuntion can not be null");
NSError* error = 0;
MTLComputePipelineReflection* ref;
m_computePipelineState = [device.GetMtlDevice() newComputePipelineStateWithDescriptor:desc options:MTLPipelineOptionBufferTypeInfo reflection:&ref error:&error];
PipelineLibrary* pipelineLibrary = static_cast<PipelineLibrary*>(pipelineLibraryBase);
if (pipelineLibrary && pipelineLibrary->IsInitialized())
{
m_computePipelineState = pipelineLibrary->CreateComputePipelineState(static_cast<uint64_t>(descriptor.GetHash()), m_computePipelineDesc);
}
else
{
MTLComputePipelineReflection* ref;
m_computePipelineState = [device.GetMtlDevice() newComputePipelineStateWithDescriptor:m_computePipelineDesc options:MTLPipelineOptionBufferTypeInfo reflection:&ref error:&error];
}
AZ_Assert(m_computePipelineState, "Could not create Pipeline object!.");
[m_computePipelineState retain];
[desc release];
desc = nil;
if (m_computePipelineState)
{
@ -262,12 +269,16 @@ namespace AZ
{
if (m_graphicsPipelineState)
{
[m_renderPipelineDesc release];
m_renderPipelineDesc = nil;
[m_graphicsPipelineState release];
m_graphicsPipelineState = nil;
}
if (m_computePipelineState)
{
[m_computePipelineDesc release];
m_computePipelineDesc = nil;
[m_computePipelineState release];
m_computePipelineState = nil;
}

@ -75,10 +75,13 @@ namespace AZ
RHI::ConstPtr<PipelineLayout> m_pipelineLayout;
AZStd::atomic_bool m_isCompiled = {false};
// Platform pipeline state.
// PSOs + descriptors
id<MTLRenderPipelineState> m_graphicsPipelineState = nil;
id<MTLComputePipelineState> m_computePipelineState = nil;
id<MTLDepthStencilState> m_depthStencilState = nil;
MTLRenderPipelineDescriptor* m_renderPipelineDesc = nil;
MTLComputePipelineDescriptor* m_computePipelineDesc = nil;
AZ::u32 m_stencilRef = 0;
RasterizerState m_rasterizerState;
MTLPrimitiveType m_primitiveTopology = MTLPrimitiveTypeTriangle;

@ -28,10 +28,11 @@ namespace AZ
//////////////////////////////////////////////////////////////////////////
// RHI::PipelineLibrary
RHI::ResultCode InitInternal([[maybe_unused]] RHI::Device& device, [[maybe_unused]] const RHI::PipelineLibraryData* serializedData) override { return RHI::ResultCode::Success;}
RHI::ResultCode InitInternal([[maybe_unused]] RHI::Device& device, [[maybe_unused]] const RHI::PipelineLibraryDescriptor& descriptor) override { return RHI::ResultCode::Success;}
void ShutdownInternal() override {}
RHI::ResultCode MergeIntoInternal([[maybe_unused]] AZStd::span<const RHI::PipelineLibrary* const> libraries) override { return RHI::ResultCode::Success;}
RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override { return nullptr;}
bool SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const override { return true;}
//////////////////////////////////////////////////////////////////////////
};
}

@ -23,7 +23,7 @@ namespace AZ
return m_nativePipelineCache;
}
RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryData* serializedData)
RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryDescriptor& descriptor)
{
DeviceObject::Init(deviceBase);
auto& device = static_cast<Device&>(deviceBase);
@ -35,10 +35,10 @@ namespace AZ
createInfo.initialDataSize = 0;
createInfo.pInitialData = nullptr;
if (serializedData)
if (descriptor.m_serializedData)
{
createInfo.initialDataSize = static_cast<size_t>(serializedData->GetData().size());
createInfo.pInitialData = serializedData->GetData().data();
createInfo.initialDataSize = static_cast<size_t>(descriptor.m_serializedData->GetData().size());
createInfo.pInitialData = descriptor.m_serializedData->GetData().data();
}
const VkResult result = vkCreatePipelineCache(device.GetNativeDevice(), &createInfo, nullptr, &m_nativePipelineCache);
@ -108,5 +108,13 @@ namespace AZ
Debug::SetNameToObject(reinterpret_cast<uint64_t>(m_nativePipelineCache), name.data(), VK_OBJECT_TYPE_PIPELINE_CACHE, static_cast<Device&>(GetDevice()));
}
}
bool PipelineLibrary::SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const
{
//Vulkan drivers cannot save serialized data
[[maybe_unused]] Device& device = static_cast<Device&>(GetDevice());
AZ_Assert(!device.GetFeatures().m_isPsoCacheFileOperationsNeeded, "Explicit PSO cache operations should not be disabled for Vulkan");
return false;
}
}
}

@ -40,10 +40,11 @@ namespace AZ
//////////////////////////////////////////////////////////////////////////
// RHI::PipelineLibrary
RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryData* serializedData) override;
RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryDescriptor& descriptor) override;
void ShutdownInternal() override;
RHI::ResultCode MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> libraries) override;
RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override;
bool SaveSerializedDataInternal(const AZStd::string& filePath) const override;
//////////////////////////////////////////////////////////////////////////
VkPipelineCache m_nativePipelineCache = VK_NULL_HANDLE;

@ -104,7 +104,7 @@ namespace AZ
char pipelineLibraryPathTemp[AZ_MAX_PATH_LEN];
azsnprintf(
pipelineLibraryPathTemp, AZ_MAX_PATH_LEN, "@user@/Atom/PipelineStateCache_%s_%i_%i _Ver_%i/%s/%s_%s_%d.bin",
pipelineLibraryPathTemp, AZ_MAX_PATH_LEN, "@user@/Atom/PipelineStateCache_%s_%u_%u_Ver_%i/%s/%s_%s_%d.bin",
ToString(physicalDeviceDesc.m_vendorId).data(), physicalDeviceDesc.m_deviceId, physicalDeviceDesc.m_driverVersion, PSOCacheVersion,
platformName.GetCStr(),
shaderName.GetCStr(),
@ -146,7 +146,7 @@ namespace AZ
RHI::PipelineStateCache* pipelineStateCache = rhiSystem->GetPipelineStateCache();
ConstPtr<RHI::PipelineLibraryData> serializedData = LoadPipelineLibrary();
RHI::PipelineLibraryHandle pipelineLibraryHandle = pipelineStateCache->CreateLibrary(serializedData.get());
RHI::PipelineLibraryHandle pipelineLibraryHandle = pipelineStateCache->CreateLibrary(serializedData.get(), m_pipelineLibraryPath);
if (pipelineLibraryHandle.IsNull())
{
@ -321,8 +321,10 @@ namespace AZ
///////////////////////////////////////////////////////////////////
ConstPtr<RHI::PipelineLibraryData> Shader::LoadPipelineLibrary() const
{
if (m_pipelineLibraryPath[0] != 0)
{
RHI::Device* device = RHI::RHISystemInterface::Get()->GetDevice();
//Check if explicit file load/save operation is needed as the RHI backend api may not support it
if (m_pipelineLibraryPath[0] != 0 && device->GetFeatures().m_isPsoCacheFileOperationsNeeded)
{
return Utils::LoadObjectFromFile<RHI::PipelineLibraryData>(m_pipelineLibraryPath);
}
@ -331,12 +333,28 @@ namespace AZ
void Shader::SavePipelineLibrary() const
{
RHI::Device* device = RHI::RHISystemInterface::Get()->GetDevice();
if (m_pipelineLibraryPath[0] != 0)
{
RHI::ConstPtr<RHI::PipelineLibraryData> serializedData = m_pipelineStateCache->GetLibrarySerializedData(m_pipelineLibraryHandle);
if (serializedData)
RHI::ConstPtr<RHI::PipelineLibrary> pipelineLib = m_pipelineStateCache->GetMergedLibrary(m_pipelineLibraryHandle);
if(!pipelineLib)
{
return;
}
//Check if explicit file load/save operation is needed as the RHI backend api may not support it
if (device->GetFeatures().m_isPsoCacheFileOperationsNeeded)
{
RHI::ConstPtr<RHI::PipelineLibraryData> serializedData = pipelineLib->GetSerializedData();
if(serializedData)
{
Utils::SaveObjectToFile<RHI::PipelineLibraryData>(m_pipelineLibraryPath, DataStream::ST_BINARY, serializedData.get());
}
}
else
{
Utils::SaveObjectToFile<RHI::PipelineLibraryData>(m_pipelineLibraryPath, DataStream::ST_BINARY, serializedData.get());
[[maybe_unused]] bool result = pipelineLib->SaveSerializedData(m_pipelineLibraryPath);
AZ_Error("Shader", result, "Pipeline Library %s was not saved", &m_pipelineLibraryPath);
}
}
}

@ -262,10 +262,11 @@ namespace UnitTest
AZ_CLASS_ALLOCATOR(PipelineLibrary, AZ::SystemAllocator, 0);
private:
AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, const AZ::RHI::PipelineLibraryData*) override { return AZ::RHI::ResultCode::Success; }
AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, [[maybe_unused]] const AZ::RHI::PipelineLibraryDescriptor& descriptor) override { return AZ::RHI::ResultCode::Success; }
void ShutdownInternal() override {}
AZ::RHI::ResultCode MergeIntoInternal(AZStd::span<const AZ::RHI::PipelineLibrary* const>) override { return AZ::RHI::ResultCode::Success; }
AZ::RHI::ConstPtr<AZ::RHI::PipelineLibraryData> GetSerializedDataInternal() const override { return nullptr; }
bool SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const { return true;}
};
class ShaderStageFunction

Loading…
Cancel
Save