Metal pso caching support via MTLBinaryArchive (#7212)

* PipelineLibrary (PSO Caching) support for Metal - API changes to handle Metal drivers implictly doing save/load of PipelineLibrary data - Fixed up code related to Metal device selection - PipelineLibrary support for Mac and ios Signed-off-by: moudgils <moudgils@amazon.com> * Fix compile errors for Dx12, Vulkan backend + Unit tests Signed-off-by: moudgils <47460854+moudgils@users.noreply.github.com> * Fixed errors related to M1 GPU Signed-off-by: moudgils <moudgils@amazon.com> * Fix a minor 'tab' validation issue Signed-off-by: moudgils <moudgils@amazon.com> * Addressed feedback Signed-off-by: moudgils <moudgils@amazon.com> * Minor feedback Signed-off-by: moudgils <moudgils@amazon.com> * Added a few asserts Signed-off-by: moudgils <47460854+moudgils@users.noreply.github.com> * Fix a typo Signed-off-by: moudgils <moudgils@amazon.com>
4 years ago · 608411ab99
parent 9a30c5366c
commit 608411ab99
25 changed files with 521 additions and 276 deletions
--- a/Gems/Atom/Feature/Common/Assets/Shaders/PostProcessing/DepthOfFieldWriteFocusDepthFromGpu.shader
+++ b/Gems/Atom/Feature/Common/Assets/Shaders/PostProcessing/DepthOfFieldWriteFocusDepthFromGpu.shader
@ -10,7 +10,6 @@
            "type" : "Compute"
        }
        ]
-    },
-    "DisabledRHIBackends": ["metal"]
+    }

 }
--- a/Gems/Atom/RHI/Code/Include/Atom/RHI.Reflect/DeviceFeatures.h
+++ b/Gems/Atom/RHI/Code/Include/Atom/RHI.Reflect/DeviceFeatures.h
@ -75,6 +75,9 @@ namespace AZ
            //! Whether Unbounded Array support is available.
            bool m_unboundedArrays = false;

+            //! Whether PipelineLibrary related serialized data needs to be loaded/saved explicitly as drivers (like dx12/vk) do not support it internally
+            bool m_isPsoCacheFileOperationsNeeded = true;
+            
            /// Additional features here.
        };
    }
--- a/Gems/Atom/RHI/Code/Include/Atom/RHI.Reflect/PhysicalDeviceDescriptor.h
+++ b/Gems/Atom/RHI/Code/Include/Atom/RHI.Reflect/PhysicalDeviceDescriptor.h
@ -22,14 +22,15 @@ namespace AZ
    {
        //! A list of popular vendor Ids. 
        AZ_ENUM_CLASS_WITH_UNDERLYING_TYPE(VendorId, uint32_t,
-            (Unknown,  0),
-            (Intel,    0x8086),
-            (nVidia,   0x10de),
-            (AMD,      0x1002),
-            (Qualcomm, 0x5143),
-            (Samsung,  0x1099),
-            (ARM,      0x13B5),
-            (Warp,     0x1414)
+            (Unknown,       0),
+            (Intel,         0x8086),
+            (nVidia,        0x10de),
+            (AMD,           0x1002),
+            (Qualcomm,      0x5143),
+            (Samsung,       0x1099),
+            (ARM,           0x13B5),
+            (Warp,          0x1414),
+            (Apple,         0x106B) 
        );

        void ReflectVendorIdEnums(ReflectContext* context);
--- a/Gems/Atom/RHI/Code/Include/Atom/RHI/PipelineLibrary.h
+++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/PipelineLibrary.h
@ -19,7 +19,15 @@ namespace AZ
        /// A handle typed to the pipeline library. Used by the PipelineStateCache to abstract access.
        using PipelineLibraryHandle = Handle<uint32_t, class PipelineLibrary>;

-
+        struct PipelineLibraryDescriptor
+        {
+            //Serialized data with which to init the PipelineLibrary
+            ConstPtr<PipelineLibraryData> m_serializedData = nullptr;
+            //The file path name associated with serialized data. It can be passed
+            //to the RHI backend to do load/save operation via the drivers.
+            AZStd::string m_filePath;
+        };
+            
        //! PipelineState initialization is an expensive operation on certain platforms. If multiple pipeline states
        //! are created with little variation between them, the contents are still duplicated. This class is an allocation
        //! context for pipeline states, provided at PipelineState::Init, which will perform de-duplication of
@ -50,8 +58,8 @@ namespace AZ
            //! serialized and the contents saved to disk. Subsequent loads will experience much faster pipeline
            //! state creation times (on supported platforms). On success, the library is transitioned to the
            //! initialized state. On failure, the library remains uninitialized.
-            //! @param serializedData The initial serialized data used to initialize the library. It can be null.
-            ResultCode Init(Device& device, const PipelineLibraryData* serializedData);
+            //! @param descriptor The descriptor needed to init the PipelineLibrary.
+            ResultCode Init(Device& device, const PipelineLibraryDescriptor& descriptor);

            //! Merges the contents of other libraries into this library. This method must be called
            //! on an initialized library. A common use case for this method is to construct thread-local
@ -65,6 +73,9 @@ namespace AZ
            //! this method to extract serialized data prior to application shutdown, save it to disk, and
            //! use it when initializing on subsequent runs.
            ConstPtr<PipelineLibraryData> GetSerializedData() const;
+            
+            //! Saves the platform-specific data to disk using the filePath provided. This is done through RHI backend drivers.
+            bool SaveSerializedData(const AZStd::string& filePath) const;

            //! Returns whether the current library need to be merged
            virtual bool IsMergeRequired() const;
@ -79,7 +90,7 @@ namespace AZ
            // Platform API

            /// Called when the library is being created.
-            virtual ResultCode InitInternal(Device& device, const PipelineLibraryData* serializedData) = 0;
+            virtual ResultCode InitInternal(Device& device, const PipelineLibraryDescriptor& descriptor) = 0;

            /// Called when the library is being shutdown.
            virtual void ShutdownInternal() = 0;
@ -89,6 +100,9 @@ namespace AZ

            /// Called when the library is serializing out platform-specific data.
            virtual ConstPtr<PipelineLibraryData> GetSerializedDataInternal() const = 0;
+            
+            /// Called when we want the RHI backend to save out the Pipeline Library via the drivers
+            virtual bool SaveSerializedDataInternal(const AZStd::string& filePath) const = 0;

            //////////////////////////////////////////////////////////////////////////
        };
--- a/Gems/Atom/RHI/Code/Include/Atom/RHI/PipelineStateCache.h
+++ b/Gems/Atom/RHI/Code/Include/Atom/RHI/PipelineStateCache.h
@ -22,131 +22,125 @@ namespace AZ
 {
    namespace RHI
    {
-        /**
-         * Problem: High-level rendering code works in 'materials', 'shaders', and 'models', but the RHI works in
-         * 'pipeline states'. Therefore, a translation process must exist to resolve a shader variation (plus runtime
-         * state) into a pipeline state suitable for consumption by the RHI. These resolve operations can number in the
-         * thousands per frame, and (ideally) are heavily jobified.
-         *
-         * Another problem is that pipeline state creation is not fast, as on some platforms it will involve synchronous
-         * byte-code compilation. This could take anywhere from <1ms to >150ms. If compilation is done synchronously and
-         * immediately, the cache will effectively stall the entire process if multiple threads request the same pending
-         * pipeline state.
-         *
-         * Therefore, PipelineStateCache adheres to the following requirements:
-         *      1. A cache miss does not serialize all threads on a pipeline state compilation event.
-         *      2. A cache hit results in zero contention.
-         *
-         * Justification: Most pipeline state compilation will occur in the first few frames, but can also occur when new
-         * 'permutations' are hit while exploring. In the 90% case, the cache is warm and each frame results in a 100%
-         * cache hit rate. With zero locks, this scales extremely well across threads and removes a bottleneck from the
-         * render code. In the event that compilations are required, multiple threads are now able to participate in the
-         * compilation process without serializing each other.
-         *
-         * To accomplish this, the pipeline state cache uses three 'phases' of caching.
-         *      1. A global, read-only cache - designed as the 'fast path' for when the cache is warm.
-         *      2. A thread-local cache - reduces contention on the global pending cache for successive requests on the same thread.
-         *      3. A global, locked pending cache - de-duplicates pipeline state allocations.
-         *
-         * Each library has global and thread-local caches. Initially, the global cache is checked, if that fails, the
-         * thread-local cache is checked (no locks taken). Finally, the pending cache is checked under a lock and if
-         * the entry still doesn't exist, it is allocated and added to the pending cache. A thread-local PipelineLibrary
-         * is used to compile the pipeline state, which eliminates all locking for compilation.
-         *
-         * Pipeline states can be acquired at any time and from any thread. The cache will take a reader lock. During
-         * AcquirePipelineState, the global read-only cache is not updated, but the thread-local cache and pending
-         * global cache may be. Furthermore, compilations are performed on the calling thread, which means that separate
-         * thread may return a pipeline state that is still compiling. It is required that all pending AcquirePipelineState
-         * calls complete prior to using the returned pipeline state pointers during command list recording.
-         *
-         * Example Scenarios:
-         *
-         *  1. Threads request the same un-cached pipeline state:
-         *
-         *      Both the global read-only cache and thread-local caches miss, one thread wins the race to take a lock
-         *      on the global pending cache. It allocates but does not compile the pipeline state. All other threads wait on the
-         *      lock (which should be quick) and then find and return the uninitialized pipeline state. The compiling
-         *      thread uses the thread-local PipelineLibrary instance to compile the pipeline state. Non-compiling threads
-         *      will enter the uninitialized pipeline state into their thread-local cache (as does the compiling thread once it
-         *      completes). Note that the compiling thread is now busy, but all remaining threads are now unblocked to compile other
-         *      pipeline states.
-         *
-         *  2. A thread requests a pipeline state being compiled on another thread:
-         *
-         *      In this case, the global read-only cache won't have the pipeline state (since it's being compiled during
-         *      the current cycle, and the pending cache is only merged at the end of the cycle). It also won't have the
-         *      entry in the thread-local cache. It then hits the global pending cache, which will return the live instance
-         *      (being compiled). It then caches the result in its thread-local cache, so that successive requests will no
-         *      longer require a lock on the pending cache.
-         *
-         *  3. The cache is warm and all pipeline states are compiled:
-         *
-         *      Each thread hits the same read-only cache (which succeeds) and returns the pipeline state immediately.
-         *      This is the fast-path case where multiple threads are now able to resolve pipeline states with very
-         *      little performance overhead.
-         *
-         * Example Usage:
-         * @code{.cpp}
-         *      // Create library instance.
-         *      RHI::PipelineLibraryHandle libraryHandle = pipelineStateCache->CreateLibrary(serializedData); // Initial data loaded from disk.
-         *
-         *      // In jobs. Lots and lots of requests.
-         *      const RHI::PipelineState* pipelineState = pipelineStateCache->AcquirePipelineState(libraryHandle, descriptor);
-         *
-         *      // Reset contents of library. Releases all pipeline state references. Library remains valid.
-         *      pipelineStateCache->ResetLibrary(libraryHandle);
-         *
-         *      // Release library and all held references.
-         *      pipelineStateCache->ReleaseLibrary(libraryHandle);
-         * @endcode
-         */
+        //! Problem: High-level rendering code works in 'materials', 'shaders', and 'models', but the RHI works in
+        //! 'pipeline states'. Therefore, a translation process must exist to resolve a shader variation (plus runtime
+        //! state) into a pipeline state suitable for consumption by the RHI. These resolve operations can number in the
+        //! thousands per frame, and (ideally) are heavily jobified.
+        //!
+        //! Another problem is that pipeline state creation is not fast, as on some platforms it will involve synchronous
+        //! byte-code compilation. This could take anywhere from <1ms to >150ms. If compilation is done synchronously and
+        //! immediately, the cache will effectively stall the entire process if multiple threads request the same pending
+        //! pipeline state.
+        //!
+        //! Therefore, PipelineStateCache adheres to the following requirements:
+        //!      1. A cache miss does not serialize all threads on a pipeline state compilation event.
+        //!      2. A cache hit results in zero contention.
+        //!
+        //! Justification: Most pipeline state compilation will occur in the first few frames, but can also occur when new
+        //! 'permutations' are hit while exploring. In the 90% case, the cache is warm and each frame results in a 100%
+        //! cache hit rate. With zero locks, this scales extremely well across threads and removes a bottleneck from the
+        //! render code. In the event that compilations are required, multiple threads are now able to participate in the
+        //! compilation process without serializing each other.
+        //!
+        //! To accomplish this, the pipeline state cache uses three 'phases' of caching.
+        //!      1. A global, read-only cache - designed as the 'fast path' for when the cache is warm.
+        //!      2. A thread-local cache - reduces contention on the global pending cache for successive requests on the same thread.
+        //!      3. A global, locked pending cache - de-duplicates pipeline state allocations.
+        //!
+        //! Each library has global and thread-local caches. Initially, the global cache is checked, if that fails, the
+        //! thread-local cache is checked (no locks taken). Finally, the pending cache is checked under a lock and if
+        //! the entry still doesn't exist, it is allocated and added to the pending cache. A thread-local PipelineLibrary
+        //! is used to compile the pipeline state, which eliminates all locking for compilation.
+        //!
+        //! Pipeline states can be acquired at any time and from any thread. The cache will take a reader lock. During
+        //! AcquirePipelineState, the global read-only cache is not updated, but the thread-local cache and pending
+        //! global cache may be. Furthermore, compilations are performed on the calling thread, which means that separate
+        //! thread may return a pipeline state that is still compiling. It is required that all pending AcquirePipelineState
+        //! calls complete prior to using the returned pipeline state pointers during command list recording.
+        //!
+        //! Example Scenarios:
+        //!
+        //!  1. Threads request the same un-cached pipeline state:
+        //!
+        //!      Both the global read-only cache and thread-local caches miss, one thread wins the race to take a lock
+        //!      on the global pending cache. It allocates but does not compile the pipeline state. All other threads wait on the
+        //!      lock (which should be quick) and then find and return the uninitialized pipeline state. The compiling
+        //!      thread uses the thread-local PipelineLibrary instance to compile the pipeline state. Non-compiling threads
+        //!      will enter the uninitialized pipeline state into their thread-local cache (as does the compiling thread once it
+        //!      completes). Note that the compiling thread is now busy, but all remaining threads are now unblocked to compile other
+        //!      pipeline states.
+        //!
+        //!  2. A thread requests a pipeline state being compiled on another thread:
+        //!
+        //!      In this case, the global read-only cache won't have the pipeline state (since it's being compiled during
+        //!      the current cycle, and the pending cache is only merged at the end of the cycle). It also won't have the
+        //!      entry in the thread-local cache. It then hits the global pending cache, which will return the live instance
+        //!      (being compiled). It then caches the result in its thread-local cache, so that successive requests will no
+        //!      longer require a lock on the pending cache.
+        //!
+        //!  3. The cache is warm and all pipeline states are compiled:
+        //!
+        //!      Each thread hits the same read-only cache (which succeeds) and returns the pipeline state immediately.
+        //!      This is the fast-path case where multiple threads are now able to resolve pipeline states with very
+        //!      little performance overhead.
+        //!
+        //! Example Usage:
+        //! @code{.cpp}
+        //!      // Create library instance.
+        //!      RHI::PipelineLibraryHandle libraryHandle = pipelineStateCache->CreateLibrary(serializedData); // Initial data loaded from disk.
+        //!
+        //!      // In jobs. Lots and lots of requests.
+        //!      const RHI::PipelineState* pipelineState = pipelineStateCache->AcquirePipelineState(libraryHandle, descriptor);
+        //!
+        //!      // Reset contents of library. Releases all pipeline state references. Library remains valid.
+        //!      pipelineStateCache->ResetLibrary(libraryHandle);
+        //!
+        //!      // Release library and all held references.
+        //!      pipelineStateCache->ReleaseLibrary(libraryHandle);
+        //! @endcode
+        //!
        class PipelineStateCache final
            : public AZStd::intrusive_base
        {
        public:
            AZ_CLASS_ALLOCATOR(PipelineStateCache, SystemAllocator, 0);

-            /**
-             * The maximum number of libraries is configurable at compile time. A fixed number is used
-             * to avoid having to lazily resize thread-local arrays when traversing them, and also to
-             * avoid a pointer indirection on access.
-             */
+            //! The maximum number of libraries is configurable at compile time. A fixed number is used
+            //! to avoid having to lazily resize thread-local arrays when traversing them, and also to
+            //! avoid a pointer indirection on access.
            static const size_t LibraryCountMax = 256;

            static Ptr<PipelineStateCache> Create(Device& device);

-            /// Resets the caches of all pipeline libraries back to empty. All internal references to pipeline states are released.
+            //! Resets the caches of all pipeline libraries back to empty. All internal references to pipeline states are released.
            void Reset();

-            /// Creates an internal pipeline library instance and returns its handle.
-            PipelineLibraryHandle CreateLibrary(const PipelineLibraryData* serializedData);
+            //! Creates an internal pipeline library instance and returns its handle.
+            PipelineLibraryHandle CreateLibrary(const PipelineLibraryData* serializedData, const AZStd::string& filePath = "");

-            /// Releases the pipeline library and purges it from the cache. Releases all held references to pipeline states for the library.
+            //! Releases the pipeline library and purges it from the cache. Releases all held references to pipeline states for the library.
            void ReleaseLibrary(PipelineLibraryHandle handle);

-            /// Resets cache contents in the library. Releases all held references to pipeline states for the library.
+            //! Resets cache contents in the library. Releases all held references to pipeline states for the library.
            void ResetLibrary(PipelineLibraryHandle handle);

-            /// Returns the serialized data for the library, which can be used to re-initialize it.
-            ConstPtr<PipelineLibraryData> GetLibrarySerializedData(PipelineLibraryHandle handle) const;
-
-            /**
-             * Acquires a pipeline state (either draw or dispatch variants) from the cache. Pipeline states are associated
-             * to a specific library handle. Successive calls with the same pipeline state descriptor hash will return the same
-             * pipeline state, even across threads. If the library handle is invalid or the acquire operation fails, a null pointer
-             * is returned. Otherwise, a valid pipeline state pointer is returned (regardless of whether pipeline state compilation succeeds).
-             *
-             * It is permitted to take a strong reference to the returned pointer, but is not necessary as long as the reference
-             * is discarded on a library reset / release event. The cache will store a reference internally. If a strong reference
-             * is held externally, the instance will remain valid even after the cache is reset / destroyed.
-             */
+            //! Returns the resulting merged library from all the threadLibraries related to the passed in handle.
+            //! The merged library can be used to write out the serialized data.
+            Ptr<PipelineLibrary> GetMergedLibrary(PipelineLibraryHandle handle) const;
+
+            //! Acquires a pipeline state (either draw or dispatch variants) from the cache. Pipeline states are associated
+            //! to a specific library handle. Successive calls with the same pipeline state descriptor hash will return the same
+            //! pipeline state, even across threads. If the library handle is invalid or the acquire operation fails, a null pointer
+            //! is returned. Otherwise, a valid pipeline state pointer is returned (regardless of whether pipeline state compilation succeeds).
+            //!
+            //! It is permitted to take a strong reference to the returned pointer, but is not necessary as long as the reference
+            //! is discarded on a library reset / release event. The cache will store a reference internally. If a strong reference
+            //! is held externally, the instance will remain valid even after the cache is reset / destroyed.
            const PipelineState* AcquirePipelineState(PipelineLibraryHandle library, const PipelineStateDescriptor& descriptor);

-            /**
-             * This method merges the global pending cache into the global read-only cache and clears all thread-local caches.
-             * This reduces the total memory footprint of the caches and optimizes subsequent fetches. This method should be called
-             * once per frame.
-             */
+            //! This method merges the global pending cache into the global read-only cache and clears all thread-local caches.
+            //! This reduces the total memory footprint of the caches and optimizes subsequent fetches. This method should be called
+            //! once per frame.
            void Compact();

        private:
@ -198,8 +192,9 @@ namespace AZ
                // Tracks the number of pipeline states actively being compiled across all threads.
                AZStd::atomic_uint32_t m_pendingCompileCount = {0};

-                // Used to prime the thread libraries.
-                ConstPtr<PipelineLibraryData> m_serializedData;
+                // Contains the initial serialized data (Used to prime the thread libraries)
+                // or the file name that contains the serialized data
+                PipelineLibraryDescriptor m_pipelineLibraryDescriptor;
            };

            using GlobalLibrarySet = AZStd::fixed_vector<GlobalLibraryEntry, LibraryCountMax>;
@ -209,36 +204,32 @@ namespace AZ
                // A thread-local cache used to reduce contention on the global pending cache.
                PipelineStateSet m_threadLocalCache;

-                /**
-                 * Each thread has its own pipeline library. This allows threads to cache disjoint
-                 * pipeline states without locking. The libraries are coalesced into a single library
-                 * during GetLibrarySerializedData. The library is lazily initialized on the thread
-                 * and uses the initial serialized data passed in at creation time.
-                 */
+                //! Each thread has its own pipeline library. This allows threads to cache disjoint
+                //! pipeline states without locking. The libraries are coalesced into a single library
+                //! during GetMergedLibrary. The library is lazily initialized on the thread
+                //! and uses the initial serialized data passed in at creation time.
                Ptr<PipelineLibrary> m_library;
            };

-            /**
-             * Each thread has its own list of pipeline library entries. The index maps 1-to-1 with GlobalLibrarySet.
-             * GlobalLibrarySet contains the total size of the array; whereas the ThreadLibrarySet is just an array.
-             * The size of the global set should be used when traversing the thread library entries.
-             */
+            //! Each thread has its own list of pipeline library entries. The index maps 1-to-1 with GlobalLibrarySet.
+            //! GlobalLibrarySet contains the total size of the array; whereas the ThreadLibrarySet is just an array.
+            //! The size of the global set should be used when traversing the thread library entries.
            using ThreadLibrarySet = AZStd::array<ThreadLibraryEntry, LibraryCountMax>;

-            /// Helper function which binary searches a pipeline state set looking for an entry which matches the requested descriptor.
+            //! Helper function which binary searches a pipeline state set looking for an entry which matches the requested descriptor.
            static const PipelineState* FindPipelineState(const PipelineStateSet& pipelineStateSet, const PipelineStateDescriptor& descriptor);

-            /// Helper function which inserts an entry into the set. Returns true if the entry was inserted, or false is a duplicate entry existed.
+            //! Helper function which inserts an entry into the set. Returns true if the entry was inserted, or false is a duplicate entry existed.
            static bool InsertPipelineState(PipelineStateSet& pipelineStateSet, PipelineStateEntry pipelineStateEntry);

-            /// Performs a pipeline state compilation on the global cache using the thread-local pipeline library.
+            //! Performs a pipeline state compilation on the global cache using the thread-local pipeline library.
            ConstPtr<PipelineState> CompilePipelineState(
                GlobalLibraryEntry& globalLibraryEntry,
                ThreadLibraryEntry& threadLibraryEntry,
                const PipelineStateDescriptor& pipelineStateDescriptor,
                PipelineStateHash pipelineStateHash);

-            /// Resets the library without validating the handle or taking a lock.
+            //! Resets the library without validating the handle or taking a lock.
            void ResetLibraryImpl(PipelineLibraryHandle handle);

            Ptr<Device> m_device;
--- a/Gems/Atom/RHI/Code/Source/RHI/PipelineLibrary.cpp
+++ b/Gems/Atom/RHI/Code/Source/RHI/PipelineLibrary.cpp
@ -25,7 +25,7 @@ namespace AZ
            return true;
        }

-        ResultCode PipelineLibrary::Init(Device& device, const PipelineLibraryData* serializedData)
+        ResultCode PipelineLibrary::Init(Device& device, const PipelineLibraryDescriptor& descriptor)
        {
            if (Validation::IsEnabled())
            {
@ -36,7 +36,7 @@ namespace AZ
                }
            }

-            ResultCode resultCode = InitInternal(device, serializedData);
+            ResultCode resultCode = InitInternal(device, descriptor);
            if (resultCode == ResultCode::Success)
            {
                DeviceObject::Init(device);
@ -72,6 +72,16 @@ namespace AZ

            return GetSerializedDataInternal();
        }
+    
+        bool PipelineLibrary::SaveSerializedData(const AZStd::string& filePath) const
+        {
+            if (!ValidateIsInitialized())
+            {
+                return false;
+            }
+
+            return SaveSerializedDataInternal(filePath);
+        }

        bool PipelineLibrary::IsMergeRequired() const
        {
--- a/Gems/Atom/RHI/Code/Source/RHI/PipelineStateCache.cpp
+++ b/Gems/Atom/RHI/Code/Source/RHI/PipelineStateCache.cpp
@ -81,7 +81,7 @@ namespace AZ
            }
        }

-        PipelineLibraryHandle PipelineStateCache::CreateLibrary(const PipelineLibraryData* serializedData)
+        PipelineLibraryHandle PipelineStateCache::CreateLibrary(const PipelineLibraryData* serializedData, const AZStd::string& filePath)
        {
            AZStd::unique_lock<AZStd::shared_mutex> lock(m_mutex);

@ -110,8 +110,8 @@ namespace AZ
            m_globalLibraryActiveBits[handle.GetIndex()] = true;

            GlobalLibraryEntry& libraryEntry = m_globalLibrarySet[handle.GetIndex()];
-            libraryEntry.m_serializedData = serializedData;
-
+            libraryEntry.m_pipelineLibraryDescriptor.m_serializedData = serializedData;
+            libraryEntry.m_pipelineLibraryDescriptor.m_filePath = filePath;
            AZ_Assert(libraryEntry.m_readOnlyCache.empty() && libraryEntry.m_pendingCache.empty(), "Library entry has entries in its caches!");

            return handle;
@ -128,8 +128,9 @@ namespace AZ

                GlobalLibraryEntry& libraryEntry = m_globalLibrarySet[handle.GetIndex()];
                libraryEntry.m_readOnlyCache.clear();
-                libraryEntry.m_serializedData = nullptr;
-
+                libraryEntry.m_pipelineLibraryDescriptor.m_serializedData = nullptr;
+                libraryEntry.m_pipelineLibraryDescriptor.m_filePath = "";
+                
                m_globalLibraryActiveBits[handle.GetIndex()] = false;
                m_libraryFreeList.push_back(handle);
            }
@ -162,7 +163,7 @@ namespace AZ
            libraryEntry.m_pendingCacheMutex.unlock();
        }

-        ConstPtr<PipelineLibraryData> PipelineStateCache::GetLibrarySerializedData(PipelineLibraryHandle handle) const
+        Ptr<PipelineLibrary> PipelineStateCache::GetMergedLibrary(PipelineLibraryHandle handle) const
        {
            if (handle.IsNull())
            {
@ -188,7 +189,7 @@ namespace AZ
                }
            });

-            bool doesPSODataExist = entry.m_serializedData.get();
+            bool doesPSODataExist = entry.m_pipelineLibraryDescriptor.m_serializedData.get();
            for (const RHI::PipelineLibrary* libraryBase : threadLibraries)
            {
                const PipelineLibrary* library = static_cast<const PipelineLibrary*>(libraryBase);
@ -198,7 +199,7 @@ namespace AZ
            if (doesPSODataExist)
            {
                Ptr<PipelineLibrary> pipelineLibrary = Factory::Get().CreatePipelineLibrary();
-                ResultCode resultCode = pipelineLibrary->Init(*m_device, entry.m_serializedData.get());
+                ResultCode resultCode = pipelineLibrary->Init(*m_device, entry.m_pipelineLibraryDescriptor);

                if (resultCode == ResultCode::Success)
                {
@ -206,7 +207,7 @@ namespace AZ

                    if (resultCode == ResultCode::Success)
                    {
-                        return pipelineLibrary->GetSerializedData();
+                        return pipelineLibrary;
                    }
                }
            }
@ -316,7 +317,7 @@ namespace AZ
                    if (!threadLibraryEntry.m_library)
                    {
                        Ptr<PipelineLibrary> pipelineLibrary = Factory::Get().CreatePipelineLibrary();
-                        RHI::ResultCode resultCode = pipelineLibrary->Init(*m_device, globalLibraryEntry.m_serializedData.get());
+                        RHI::ResultCode resultCode = pipelineLibrary->Init(*m_device, globalLibraryEntry.m_pipelineLibraryDescriptor);
                        if (resultCode != RHI::ResultCode::Success)
                        {
                            AZ_Warning("PipelineStateCache", false, "Failed to initialize pipeline library. PipelineLibrary usage is disabled.");
--- a/Gems/Atom/RHI/Code/Tests/PipelineState.h
+++ b/Gems/Atom/RHI/Code/Tests/PipelineState.h
@ -23,10 +23,11 @@ namespace UnitTest
        AZStd::unordered_map<uint64_t, const AZ::RHI::PipelineState*> m_pipelineStates;

    private:
-        AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, const AZ::RHI::PipelineLibraryData*) override { return AZ::RHI::ResultCode::Success; }
+        AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, const RHI::PipelineLibraryDescriptor&) override { return AZ::RHI::ResultCode::Success; }
        void ShutdownInternal() override;
        AZ::RHI::ResultCode MergeIntoInternal(AZStd::span<const AZ::RHI::PipelineLibrary* const>) override;
        AZ::RHI::ConstPtr<AZ::RHI::PipelineLibraryData> GetSerializedDataInternal() const override { return nullptr; }
+        bool SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const override { return false; }
    };

    class PipelineState
--- a/Gems/Atom/RHI/Code/Tests/PipelineStateTests.cpp
+++ b/Gems/Atom/RHI/Code/Tests/PipelineStateTests.cpp
@ -189,12 +189,12 @@ namespace UnitTest
        RHI::Ptr<RHI::Device> device = MakeTestDevice();

        RHI::Ptr<RHI::PipelineLibrary> pipelineLibrary = RHI::Factory::Get().CreatePipelineLibrary();
-        RHI::ResultCode resultCode = pipelineLibrary->Init(*device, nullptr);
+        RHI::ResultCode resultCode = pipelineLibrary->Init(*device, RHI::PipelineLibraryDescriptor{});
        EXPECT_EQ(resultCode, RHI::ResultCode::Success);

        // Second init should fail and throw validation error.
        AZ_TEST_START_ASSERTTEST;
-        resultCode = pipelineLibrary->Init(*device, nullptr);
+        resultCode = pipelineLibrary->Init(*device, RHI::PipelineLibraryDescriptor{});
        AZ_TEST_STOP_ASSERTTEST(1);

        EXPECT_EQ(resultCode, RHI::ResultCode::InvalidOperation);
@ -249,7 +249,7 @@ namespace UnitTest
        // Calling library methods with a null handle should early out.
        pipelineStateCache->ResetLibrary({});
        pipelineStateCache->ReleaseLibrary({});
-        EXPECT_EQ(pipelineStateCache->GetLibrarySerializedData({}), nullptr);
+        EXPECT_EQ(pipelineStateCache->GetMergedLibrary({}), nullptr);
        EXPECT_EQ(pipelineStateCache->AcquirePipelineState({}, CreatePipelineStateDescriptor(0)), nullptr);
        pipelineStateCache->Compact();
        ValidateCacheIntegrity(pipelineStateCache);
--- a/Gems/Atom/RHI/DX12/Code/Source/RHI/PipelineLibrary.cpp
+++ b/Gems/Atom/RHI/DX12/Code/Source/RHI/PipelineLibrary.cpp
@ -40,7 +40,7 @@ namespace AZ
            return aznew PipelineLibrary;
        }

-        RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, [[maybe_unused]] const RHI::PipelineLibraryData* serializedData)
+        RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, [[maybe_unused]] const RHI::PipelineLibraryDescriptor& descriptor)
        {
            Device& device = static_cast<Device&>(deviceBase);
            ID3D12DeviceX* dx12Device = device.GetDevice();
@ -57,9 +57,9 @@ namespace AZ
            }


-            if (serializedData && shouldCreateLibFromSerializedData)
+            if (descriptor.m_serializedData && shouldCreateLibFromSerializedData)
            {
-                bytes = serializedData->GetData();
+                bytes = descriptor.m_serializedData->GetData();
            }

            Microsoft::WRL::ComPtr<ID3D12PipelineLibraryX> libraryComPtr;
@ -70,7 +70,7 @@ namespace AZ

                if (SUCCEEDED(hr))
                {
-                    m_serializedData = serializedData;
+                    m_serializedData = descriptor.m_serializedData;
                }
                else
                {
@ -270,5 +270,13 @@ namespace AZ
            return false;
 #endif
        }
+
+        bool PipelineLibrary::SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const
+        {
+            // DX12 drivers cannot save serialized data
+            [[maybe_unused]] Device& device = static_cast<Device&>(GetDevice());
+            AZ_Assert(!device.GetFeatures().m_isPsoCacheFileOperationsNeeded, "Explicit PSO cache operations should not be disabled for DX12");
+            return false;
+        }
    }
 }
--- a/Gems/Atom/RHI/DX12/Code/Source/RHI/PipelineLibrary.h
+++ b/Gems/Atom/RHI/DX12/Code/Source/RHI/PipelineLibrary.h
@ -31,11 +31,12 @@ namespace AZ

            //////////////////////////////////////////////////////////////////////////
            // RHI::PipelineLibrary
-            RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryData* serializedData) override;
+            RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryDescriptor& descriptor) override;
            void ShutdownInternal() override;
            RHI::ResultCode MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> libraries) override;
            RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override;
            bool IsMergeRequired() const;
+            bool SaveSerializedDataInternal(const AZStd::string& filePath) const override;
            //////////////////////////////////////////////////////////////////////////

            ID3D12DeviceX* m_dx12Device = nullptr;
--- a/Gems/Atom/RHI/Metal/Code/Source/Platform/iOS/RHI/Metal_RHI_iOS.cpp
+++ b/Gems/Atom/RHI/Metal/Code/Source/Platform/iOS/RHI/Metal_RHI_iOS.cpp
@ -24,7 +24,7 @@ namespace Platform
    {
        AZ::RHI::PhysicalDeviceList physicalDeviceList;
        AZ::Metal::PhysicalDevice* physicalDevice = aznew AZ::Metal::PhysicalDevice;
-        physicalDevice->Init(nil);
+        physicalDevice->Init(MTLCreateSystemDefaultDevice());
        physicalDeviceList.emplace_back(physicalDevice);
        return physicalDeviceList;
    }
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/CommandList.cpp
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/CommandList.cpp
@ -73,11 +73,11 @@ namespace AZ
                    const auto* sourceBuffer = static_cast<const Buffer*>(descriptor.m_sourceBuffer);
                    const auto* destinationBuffer = static_cast<const Buffer*>(descriptor.m_destinationBuffer);

-                    [blitEncoder copyFromBuffer:sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
-                                   sourceOffset:descriptor.m_sourceOffset
-                                       toBuffer:destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
-                              destinationOffset:descriptor.m_destinationOffset
-                                           size:descriptor.m_size];
+                    [blitEncoder copyFromBuffer: sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
+                                   sourceOffset: descriptor.m_sourceOffset
+                                       toBuffer: destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
+                              destinationOffset: descriptor.m_destinationOffset
+                                           size: descriptor.m_size];

                    Platform::SynchronizeBufferOnGPU(blitEncoder, destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>());
                    break;
@ -101,14 +101,14 @@ namespace AZ
                                                    descriptor.m_destinationOrigin.m_front);

                    [blitEncoder copyFromTexture: sourceImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
-                                            sourceSlice: descriptor.m_sourceSubresource.m_arraySlice
-                                            sourceLevel: descriptor.m_sourceSubresource.m_mipSlice
-                                           sourceOrigin: sourceOrigin
-                                             sourceSize: sourceSize
-                                              toTexture: destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
-                                       destinationSlice: descriptor.m_destinationSubresource.m_arraySlice
-                                       destinationLevel: descriptor.m_destinationSubresource.m_mipSlice
-                                      destinationOrigin: destinationOrigin];
+                                     sourceSlice: descriptor.m_sourceSubresource.m_arraySlice
+                                     sourceLevel: descriptor.m_sourceSubresource.m_mipSlice
+                                    sourceOrigin: sourceOrigin
+                                      sourceSize: sourceSize
+                                       toTexture: destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
+                                destinationSlice: descriptor.m_destinationSubresource.m_arraySlice
+                                destinationLevel: descriptor.m_destinationSubresource.m_mipSlice
+                               destinationOrigin: destinationOrigin];

                    Platform::SynchronizeTextureOnGPU(blitEncoder, destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>());
                    break;
@ -127,15 +127,15 @@ namespace AZ
                                                     descriptor.m_sourceSize.m_height,
                                                     descriptor.m_sourceSize.m_depth);

-                    [blitEncoder copyFromBuffer:sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
-                                   sourceOffset:sourceBuffer->GetMemoryView().GetOffset() + descriptor.m_sourceOffset
-                              sourceBytesPerRow:descriptor.m_sourceBytesPerRow
-                            sourceBytesPerImage:descriptor.m_sourceBytesPerImage
-                                     sourceSize:sourceSize
-                                      toTexture:destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
-                               destinationSlice:descriptor.m_destinationSubresource.m_arraySlice
-                               destinationLevel:descriptor.m_destinationSubresource.m_mipSlice
-                              destinationOrigin:destinationOrigin];
+                    [blitEncoder copyFromBuffer: sourceBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
+                                   sourceOffset: sourceBuffer->GetMemoryView().GetOffset() + descriptor.m_sourceOffset
+                              sourceBytesPerRow: descriptor.m_sourceBytesPerRow
+                            sourceBytesPerImage: descriptor.m_sourceBytesPerImage
+                                     sourceSize: sourceSize
+                                      toTexture: destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
+                               destinationSlice: descriptor.m_destinationSubresource.m_arraySlice
+                               destinationLevel: descriptor.m_destinationSubresource.m_mipSlice
+                              destinationOrigin: destinationOrigin];

                    Platform::SynchronizeTextureOnGPU(blitEncoder, destinationImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>());
                    break;
@ -154,15 +154,15 @@ namespace AZ
                                                     descriptor.m_sourceSize.m_height,
                                                     descriptor.m_sourceSize.m_depth);

-                    [blitEncoder copyFromTexture:sourceImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
-                                     sourceSlice:descriptor.m_sourceSubresource.m_arraySlice
-                                     sourceLevel:descriptor.m_sourceSubresource.m_mipSlice
-                                    sourceOrigin:sourceOrigin
-                                      sourceSize:sourceSize
-                                        toBuffer:destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
-                               destinationOffset:destinationBuffer->GetMemoryView().GetOffset() + descriptor.m_destinationOffset
-                          destinationBytesPerRow:descriptor.m_destinationBytesPerRow
-                        destinationBytesPerImage:descriptor.m_destinationBytesPerImage];
+                    [blitEncoder copyFromTexture: sourceImage->GetMemoryView().GetGpuAddress<id<MTLTexture>>()
+                                     sourceSlice: descriptor.m_sourceSubresource.m_arraySlice
+                                     sourceLevel: descriptor.m_sourceSubresource.m_mipSlice
+                                    sourceOrigin: sourceOrigin
+                                      sourceSize: sourceSize
+                                        toBuffer: destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>()
+                               destinationOffset: destinationBuffer->GetMemoryView().GetOffset() + descriptor.m_destinationOffset
+                          destinationBytesPerRow: descriptor.m_destinationBytesPerRow
+                        destinationBytesPerImage: descriptor.m_destinationBytesPerImage];

                    Platform::SynchronizeBufferOnGPU(blitEncoder, destinationBuffer->GetMemoryView().GetGpuAddress<id<MTLBuffer>>());
                    break;
@ -192,7 +192,7 @@ namespace AZ

            id<MTLComputeCommandEncoder> computeEncoder = GetEncoder<id<MTLComputeCommandEncoder>>();
            [computeEncoder dispatchThreadgroups: numThreadGroup
-                             threadsPerThreadgroup: threadsPerGroup];
+                           threadsPerThreadgroup: threadsPerGroup];

        }

@ -235,8 +235,8 @@ namespace AZ
                {
                    id<MTLComputeCommandEncoder> computeEncoder = GetEncoder<id<MTLComputeCommandEncoder>>();
                    [computeEncoder setBytes: item.m_rootConstants
-                                     length: pipelineLayout.GetRootConstantsSize()
-                                    atIndex: pipelineLayout.GetRootConstantsSlotIndex()];
+                                      length: pipelineLayout.GetRootConstantsSize()
+                                     atIndex: pipelineLayout.GetRootConstantsSlotIndex()];

                }
            }
@ -434,25 +434,25 @@ namespace AZ
                            case RHI::ShaderStage::Vertex:
                            {
                                id<MTLRenderCommandEncoder> renderEncoder = GetEncoder<id<MTLRenderCommandEncoder>>();
-                                [renderEncoder setVertexBuffers:&mtlArgBuffers[startingIndex]
-                                                        offsets:&mtlArgBufferOffsets[startingIndex]
-                                                      withRange:range];
+                                [renderEncoder setVertexBuffers: &mtlArgBuffers[startingIndex]
+                                                        offsets: &mtlArgBufferOffsets[startingIndex]
+                                                      withRange: range];
                                break;
                            }
                            case RHI::ShaderStage::Fragment:
                            {
                                id<MTLRenderCommandEncoder> renderEncoder = GetEncoder<id<MTLRenderCommandEncoder>>();
-                                [renderEncoder setFragmentBuffers:&mtlArgBuffers[startingIndex]
-                                                          offsets:&mtlArgBufferOffsets[startingIndex]
-                                                        withRange:range];
+                                [renderEncoder setFragmentBuffers: &mtlArgBuffers[startingIndex]
+                                                          offsets: &mtlArgBufferOffsets[startingIndex]
+                                                        withRange: range];
                                break;
                            }
                            case RHI::ShaderStage::Compute:
                            {
                                id<MTLComputeCommandEncoder> computeEncoder = GetEncoder<id<MTLComputeCommandEncoder>>();
-                                [computeEncoder     setBuffers:&mtlArgBuffers[startingIndex]
-                                                       offsets:&mtlArgBufferOffsets[startingIndex]
-                                                     withRange:range];
+                                [computeEncoder     setBuffers: &mtlArgBuffers[startingIndex]
+                                                       offsets: &mtlArgBufferOffsets[startingIndex]
+                                                     withRange: range];
                                break;
                            }
                            default:
@ -537,13 +537,13 @@ namespace AZ

                    uint32_t indexOffset = indexBuffDescriptor.GetByteOffset() + (indexed.m_indexOffset * indexTypeSize) + buff->GetMemoryView().GetOffset();
                    [renderEncoder drawIndexedPrimitives: mtlPrimType
-                                                indexCount: indexed.m_indexCount
-                                                indexType: mtlIndexType
-                                                indexBuffer: mtlBuff
-                                                indexBufferOffset: indexOffset
-                                                instanceCount: indexed.m_instanceCount
-                                                baseVertex: indexed.m_vertexOffset
-                                                baseInstance: indexed.m_instanceOffset];
+                                              indexCount: indexed.m_indexCount
+                                               indexType: mtlIndexType
+                                             indexBuffer: mtlBuff
+                                       indexBufferOffset: indexOffset
+                                           instanceCount: indexed.m_instanceCount
+                                              baseVertex: indexed.m_vertexOffset
+                                            baseInstance: indexed.m_instanceOffset];
                    break;
                }

@ -551,10 +551,10 @@ namespace AZ
                {
                    const RHI::DrawLinear& linear = drawItem.m_arguments.m_linear;
                    [renderEncoder drawPrimitives: mtlPrimType
-                                        vertexStart: linear.m_vertexOffset
-                                        vertexCount: linear.m_vertexCount
-                                      instanceCount: linear.m_instanceCount
-                                       baseInstance: linear.m_instanceOffset];
+                                      vertexStart: linear.m_vertexOffset
+                                      vertexCount: linear.m_vertexCount
+                                    instanceCount: linear.m_instanceCount
+                                     baseInstance: linear.m_instanceOffset];
                    break;
                }
            }
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/Device.cpp
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/Device.cpp
@ -15,6 +15,7 @@
 #include <RHI/Conversions.h>
 #include <RHI/Device.h>
 #include <RHI/Metal.h>
+#include <RHI/PhysicalDevice.h>

 //Symbols related to Obj-c categories are getting stripped out as part of the link step for monolithic builds
 //This forces the linker to not strip symbols related to categories without actually referencing the dummy function.
@ -40,9 +41,10 @@ namespace AZ
            return aznew Device();
        }

-        RHI::ResultCode Device::InitInternal(RHI::PhysicalDevice& physicalDevice)
+        RHI::ResultCode Device::InitInternal(RHI::PhysicalDevice& physicalDeviceBase)
        {
-            m_metalDevice = MTLCreateSystemDefaultDevice();
+            PhysicalDevice& physicalDevice = static_cast<PhysicalDevice&>(physicalDeviceBase);
+            m_metalDevice = physicalDevice.GetNativeDevice();
            AZ_Assert(m_metalDevice, "Native device wasnt created");
            m_eventListener = [[MTLSharedEventListener alloc] init];

@ -340,6 +342,9 @@ namespace AZ
            m_features.m_customResolvePositions = m_metalDevice.programmableSamplePositionsSupported;
            m_features.m_indirectDrawSupport = false;
            
+            //Metal drivers save and load serialized PipelineLibrary internally
+            m_features.m_isPsoCacheFileOperationsNeeded = false; 
+            
            RHI::QueryTypeFlags counterSamplingFlags = RHI::QueryTypeFlags::None;

            bool supportsInterDrawTimestamps = true;
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/PhysicalDevice.cpp
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/PhysicalDevice.cpp
@ -15,11 +15,15 @@ namespace Platform
    AZ::RHI::PhysicalDeviceList EnumerateDevices();
 }

-
 namespace AZ
 {
    namespace Metal
    {
+        id<MTLDevice> PhysicalDevice::GetNativeDevice()
+        {
+            return m_mtlNativeDevice;
+        }
+    
        RHI::PhysicalDeviceList PhysicalDevice::Enumerate()
        {
            return Platform::EnumerateDevices();
@ -29,15 +33,34 @@ namespace AZ
        {
            if(mtlDevice)
            {
+                m_mtlNativeDevice = mtlDevice;
                NSString * deviceName = [mtlDevice name];
-                const char * secondName = [ deviceName UTF8String ];
-                m_descriptor.m_description = AZStd::string(secondName);
-                m_descriptor.m_deviceId = [mtlDevice registryID];
+                const char * deviceNameCStr = [ deviceName UTF8String ];
+                m_descriptor.m_description = AZStd::string(deviceNameCStr);
+                m_descriptor.m_deviceId = deviceName.hash; //Used for storing PipelineLibraries
                
-                //Currently no way of knowing vendor id through metal. Using AMD as a placeholder for now.
-                m_descriptor.m_vendorId = RHI::VendorId::AMD;
+                if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::Apple).data()))
+                {
+                    m_descriptor.m_vendorId = RHI::VendorId::Apple;
+                }
+                else if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::Intel).data()))
+                {
+                    m_descriptor.m_vendorId = RHI::VendorId::Intel;
+                }
+                else if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::nVidia).data()))
+                {
+                    m_descriptor.m_vendorId = RHI::VendorId::nVidia;
+                }
+                else if(strstr(m_descriptor.m_description.c_str(), ToString(RHI::VendorId::AMD).data()))
+                {
+                    m_descriptor.m_vendorId = RHI::VendorId::AMD;
+                }
                
                m_descriptor.m_type = Platform::GetPhysicalDeviceType(mtlDevice);
+                
+                NSOperatingSystemVersion version = [[NSProcessInfo processInfo] operatingSystemVersion];
+                AZStd::string concatVer = AZStd::string::format("%li%li%li", version.majorVersion, version.minorVersion, version.patchVersion);
+                m_descriptor.m_driverVersion = AZStd::stoi(concatVer);
            }
        }

--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/PhysicalDevice.h
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/PhysicalDevice.h
@ -26,8 +26,12 @@ namespace AZ

            void Init(id<MTLDevice> mtlDevice);
            static RHI::PhysicalDeviceList Enumerate();
+            id<MTLDevice> GetNativeDevice();
+
        private:
            void Shutdown() override;
+            
+            id<MTLDevice> m_mtlNativeDevice = nil;
        };
    }
 }
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineLibrary.cpp
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineLibrary.cpp
@ -8,6 +8,7 @@

 #include <RHI/Device.h>
 #include <RHI/PipelineLibrary.h>
+#include <AzCore/Name/Name.h>

 namespace AZ
 {
@ -18,17 +19,111 @@ namespace AZ
            return aznew PipelineLibrary;
        }

-        RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryData* serializedData)
+        id<MTLBinaryArchive> PipelineLibrary::GetNativePipelineCache() const
        {
+            return m_mtlBinaryArchive;
+        }
+    
+        RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryDescriptor& descriptor)
+        {
+            DeviceObject::Init(deviceBase);
+            auto& device = static_cast<Device&>(deviceBase);
+
+            m_descriptor = descriptor;
+            NSError* error = nil;
+            MTLBinaryArchiveDescriptor* desc = [[MTLBinaryArchiveDescriptor alloc] init];
+        
+            NSString* psoCacheFilePath = [NSString stringWithCString:descriptor.m_filePath.c_str() encoding:NSUTF8StringEncoding];
+            NSURL* filePathURL = [NSURL fileURLWithPath:psoCacheFilePath isDirectory:NO];
+            
+            //Pass in the file path if it exists
+            if ([filePathURL checkResourceIsReachableAndReturnError:&error])
+            {
+                desc.url = filePathURL;
+            }
+            
+            //Create a new Pso cache. Use the existing fOile on disk if provided
+            m_mtlBinaryArchive = [device.GetMtlDevice() newBinaryArchiveWithDescriptor:desc error:&error];
+            [desc release];
+            desc = nil;
+            
+            SetName(GetName());
+            NSString* labelName = [NSString stringWithCString:GetName().GetCStr() encoding:NSUTF8StringEncoding];
+            m_mtlBinaryArchive.label = labelName;
            return RHI::ResultCode::Success;
        }

        void PipelineLibrary::ShutdownInternal()
        {
+            [m_mtlBinaryArchive release];
+            m_mtlBinaryArchive = nil;
+            m_renderPipelineStates.clear();
+            m_computePipelineStates.clear();
        }

+        id<MTLRenderPipelineState> PipelineLibrary::CreateGraphicsPipelineState(uint64_t hash, MTLRenderPipelineDescriptor* pipelineStateDesc)
+        {
+            Device& device = static_cast<Device&>(GetDevice());
+            NSError* error = nil;
+            AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
+            
+            NSArray* binArchives = [NSArray arrayWithObjects:m_mtlBinaryArchive,nil];
+            pipelineStateDesc.binaryArchives = binArchives;
+            //Create a new PSO. The drivers will use the Pso cache if the PSO resides in it
+            id<MTLRenderPipelineState> graphicsPipelineState =
+                [device.GetMtlDevice() newRenderPipelineStateWithDescriptor:pipelineStateDesc
+                                                                      error:&error];
+            m_renderPipelineStates.emplace(hash, pipelineStateDesc);
+            return graphicsPipelineState;
+        }
+        
+        id<MTLComputePipelineState> PipelineLibrary::CreateComputePipelineState(uint64_t hash, MTLComputePipelineDescriptor* pipelineStateDesc)
+        {
+            Device& device = static_cast<Device&>(GetDevice());
+            NSError* error = nil;
+            MTLComputePipelineReflection* ref;
+            AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
+            
+            NSArray* binArchives = [NSArray arrayWithObjects:m_mtlBinaryArchive,nil];
+            pipelineStateDesc.binaryArchives = binArchives;
+            //Create a new PSO. The drivers will use the Pso cache if the PSO resides in it
+            id<MTLComputePipelineState> computePipelineState =
+                [device.GetMtlDevice() newComputePipelineStateWithDescriptor: pipelineStateDesc
+                                                                     options: MTLPipelineOptionBufferTypeInfo
+                                                                  reflection: &ref
+                                                                       error: &error];
+            m_computePipelineStates.emplace(hash, pipelineStateDesc);
+            return computePipelineState;
+        }
+    
        RHI::ResultCode PipelineLibrary::MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> pipelineLibraries)
        {
+            AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
+            NSError* error = nil;
+            for (const RHI::PipelineLibrary* libraryBase : pipelineLibraries)
+            {
+                const PipelineLibrary* library = static_cast<const PipelineLibrary*>(libraryBase);
+                for (const auto& pipelineStateEntry : library->m_renderPipelineStates)
+                {
+                    if (m_renderPipelineStates.find(pipelineStateEntry.first) == m_renderPipelineStates.end())
+                    {
+                        [m_mtlBinaryArchive addRenderPipelineFunctionsWithDescriptor:pipelineStateEntry.second
+                                                                               error:&error];
+                        m_renderPipelineStates.emplace(pipelineStateEntry.first, pipelineStateEntry.second);
+                    }
+                }
+                
+                for (const auto& pipelineStateEntry : library->m_computePipelineStates)
+                {
+                    if (m_computePipelineStates.find(pipelineStateEntry.first) == m_computePipelineStates.end())
+                    {
+                        [m_mtlBinaryArchive addComputePipelineFunctionsWithDescriptor:pipelineStateEntry.second
+                                                                               error:&error];
+                        m_computePipelineStates.emplace(pipelineStateEntry.first, pipelineStateEntry.second);
+                    }
+                }
+            }
+            
            return RHI::ResultCode::Success;
        }

@ -36,5 +131,39 @@ namespace AZ
        {
            return nullptr;
        }
+    
+        bool PipelineLibrary::SaveSerializedDataInternal(const AZStd::string& filePath) const
+        {
+            AZStd::lock_guard<AZStd::mutex> lock(m_mutex);
+     
+            NSError* error = nil;
+            NSString*  psoCacheFilePath = [NSString stringWithCString:filePath.c_str() encoding:NSUTF8StringEncoding];
+            NSURL *baseURL = [NSURL fileURLWithPath:psoCacheFilePath];
+            
+            BOOL isDir;
+            NSFileManager *fileManager= [NSFileManager defaultManager];
+            NSString *directory = [psoCacheFilePath stringByDeletingLastPathComponent];
+            //If the directory where the PSO cache will reside does not exist create one
+            if(![fileManager fileExistsAtPath:directory isDirectory:&isDir])
+            {
+                if(![fileManager createDirectoryAtPath:directory withIntermediateDirectories:YES attributes:nil error:NULL])
+                {
+                    AZ_Error("PipelineStateCache", false, "Error: Unable to create the folder %s in order to save the PSO Cache", psoCacheFilePath);
+                    return false;
+                }
+            }
+            
+            if(m_mtlBinaryArchive)
+            {
+                [m_mtlBinaryArchive serializeToURL:baseURL
+                                             error:&error];
+            }
+            return error==nil;
+        }
+    
+        bool PipelineLibrary::IsMergeRequired() const
+        {
+            return !m_renderPipelineStates.empty() || !m_computePipelineStates.empty();
+        }
    }
 }
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineLibrary.h
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineLibrary.h
@ -22,18 +22,30 @@ namespace AZ
            AZ_DISABLE_COPY_MOVE(PipelineLibrary);

            static RHI::Ptr<PipelineLibrary> Create();
-
+            id<MTLBinaryArchive> GetNativePipelineCache() const;
+            id<MTLRenderPipelineState> CreateGraphicsPipelineState(uint64_t hash, MTLRenderPipelineDescriptor* pipelineStateDesc);
+            id<MTLComputePipelineState> CreateComputePipelineState(uint64_t hash, MTLComputePipelineDescriptor* pipelineStateDesc);
+            
        private:
            PipelineLibrary() = default;

            //////////////////////////////////////////////////////////////////////////
            // RHI::PipelineLibrary
-            RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryData* serializedData) override;
+            RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryDescriptor& descriptor) override;
            void ShutdownInternal() override;
            RHI::ResultCode MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> libraries) override;
            RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override;
+            bool IsMergeRequired() const;
+            bool SaveSerializedDataInternal(const AZStd::string& filePath) const;
            //////////////////////////////////////////////////////////////////////////

+            RHI::PipelineLibraryDescriptor m_descriptor;
+            id<MTLBinaryArchive> m_mtlBinaryArchive = nil;
+            mutable AZStd::mutex m_mutex;
+            
+            // Internally tracks additions to the library. Used when merging libraries together.
+            AZStd::unordered_map<uint64_t, MTLRenderPipelineDescriptor*> m_renderPipelineStates;
+            AZStd::unordered_map<uint64_t, MTLComputePipelineDescriptor*> m_computePipelineStates;
        };
    }
 }
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineState.cpp
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineState.cpp
@ -13,6 +13,7 @@
 #include <RHI/Conversions.h>
 #include <RHI/Device.h>
 #include <RHI/PipelineState.h>
+#include <RHI/PipelineLibrary.h>

 namespace AZ
 {
@ -110,8 +111,6 @@ namespace AZ
            
            [source release];
            source = nil;
-            
-            
            return pFunction;
        }
        
@ -119,68 +118,71 @@ namespace AZ
                                                    const RHI::PipelineStateDescriptorForDraw& descriptor,
                                                    RHI::PipelineLibrary* pipelineLibraryBase)
        {
+            NSError* error = 0;
            Device& device = static_cast<Device&>(deviceBase);
            RHI::ConstPtr<PipelineLayout> pipelineLayout = device.AcquirePipelineLayout(*descriptor.m_pipelineLayoutDescriptor);
            AZ_Assert(pipelineLayout, "PipelineLayout can not be null");
            
            const RHI::RenderAttachmentConfiguration& attachmentsConfiguration = descriptor.m_renderAttachmentConfiguration;
-            MTLRenderPipelineDescriptor* desc = [[MTLRenderPipelineDescriptor alloc] init];
+            m_renderPipelineDesc = [[MTLRenderPipelineDescriptor alloc] init];
            
            for (AZ::u32 i = 0; i < attachmentsConfiguration.GetRenderTargetCount(); ++i)
            {
-                desc.colorAttachments[i].pixelFormat = ConvertPixelFormat(attachmentsConfiguration.GetRenderTargetFormat(i));
-                desc.colorAttachments[i].writeMask = ConvertColorWriteMask(descriptor.m_renderStates.m_blendState.m_targets[i].m_writeMask);
-                desc.colorAttachments[i].blendingEnabled = descriptor.m_renderStates.m_blendState.m_targets[i].m_enable;
-                desc.colorAttachments[i].alphaBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaOp);
-                desc.colorAttachments[i].rgbBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendOp);
-                desc.colorAttachments[i].destinationAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaDest);
-                desc.colorAttachments[i].destinationRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendDest);;
-                desc.colorAttachments[i].sourceAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaSource);
-                desc.colorAttachments[i].sourceRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendSource);;
+                m_renderPipelineDesc.colorAttachments[i].pixelFormat = ConvertPixelFormat(attachmentsConfiguration.GetRenderTargetFormat(i));
+                m_renderPipelineDesc.colorAttachments[i].writeMask = ConvertColorWriteMask(descriptor.m_renderStates.m_blendState.m_targets[i].m_writeMask);
+                m_renderPipelineDesc.colorAttachments[i].blendingEnabled = descriptor.m_renderStates.m_blendState.m_targets[i].m_enable;
+                m_renderPipelineDesc.colorAttachments[i].alphaBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaOp);
+                m_renderPipelineDesc.colorAttachments[i].rgbBlendOperation = ConvertBlendOp(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendOp);
+                m_renderPipelineDesc.colorAttachments[i].destinationAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaDest);
+                m_renderPipelineDesc.colorAttachments[i].destinationRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendDest);;
+                m_renderPipelineDesc.colorAttachments[i].sourceAlphaBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendAlphaSource);
+                m_renderPipelineDesc.colorAttachments[i].sourceRGBBlendFactor = ConvertBlendFactor(descriptor.m_renderStates.m_blendState.m_targets[i].m_blendSource);;
            }
            
            MTLVertexDescriptor* vertexDescriptor = [[MTLVertexDescriptor alloc] init];
            ConvertInputElements(descriptor.m_inputStreamLayout, vertexDescriptor);
-            desc.vertexDescriptor = vertexDescriptor;
+            m_renderPipelineDesc.vertexDescriptor = vertexDescriptor;
            [vertexDescriptor release];
            vertexDescriptor = nil;
            
-            desc.vertexFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_vertexFunction.get());
-            AZ_Assert(desc.vertexFunction, "Vertex mtlFuntion can not be null");
-            desc.fragmentFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_fragmentFunction.get());
+            m_renderPipelineDesc.vertexFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_vertexFunction.get());
+            AZ_Assert(m_renderPipelineDesc.vertexFunction, "Vertex mtlFuntion can not be null");
+            m_renderPipelineDesc.fragmentFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_fragmentFunction.get());
            
            RHI::Format depthStencilFormat = attachmentsConfiguration.GetDepthStencilFormat();
            if(descriptor.m_renderStates.m_depthStencilState.m_stencil.m_enable || IsDepthStencilMerged(depthStencilFormat))
            {
-                desc.stencilAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
+                m_renderPipelineDesc.stencilAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
            }
            
            //Depthstencil state
            if(descriptor.m_renderStates.m_depthStencilState.m_depth.m_enable || IsDepthStencilMerged(depthStencilFormat))
            {
-                desc.depthAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
+                m_renderPipelineDesc.depthAttachmentPixelFormat = ConvertPixelFormat(depthStencilFormat);
                
                MTLDepthStencilDescriptor* depthStencilDesc = [[MTLDepthStencilDescriptor alloc] init];
                ConvertDepthStencilState(descriptor.m_renderStates.m_depthStencilState, depthStencilDesc);
                m_depthStencilState = [device.GetMtlDevice() newDepthStencilStateWithDescriptor:depthStencilDesc];
                AZ_Assert(m_depthStencilState, "Could not create Depth Stencil state.");
-                [m_depthStencilState retain];
                [depthStencilDesc release];
                depthStencilDesc = nil;
            }
                        
-            desc.sampleCount = descriptor.m_renderStates.m_multisampleState.m_samples;
-            desc.alphaToCoverageEnabled = descriptor.m_renderStates.m_blendState.m_alphaToCoverageEnable;
+            m_renderPipelineDesc.sampleCount = descriptor.m_renderStates.m_multisampleState.m_samples;
+            m_renderPipelineDesc.alphaToCoverageEnabled = descriptor.m_renderStates.m_blendState.m_alphaToCoverageEnable;
            
-            NSError* error = 0;
-            MTLRenderPipelineReflection* ref;
-            m_graphicsPipelineState = [device.GetMtlDevice() newRenderPipelineStateWithDescriptor:desc options : MTLPipelineOptionBufferTypeInfo reflection : &ref error : &error];
+            PipelineLibrary* pipelineLibrary = static_cast<PipelineLibrary*>(pipelineLibraryBase);
+            if (pipelineLibrary && pipelineLibrary->IsInitialized())
+            {
+                m_graphicsPipelineState = pipelineLibrary->CreateGraphicsPipelineState(static_cast<uint64_t>(descriptor.GetHash()), m_renderPipelineDesc);
+            }
+            else
+            {
+                MTLRenderPipelineReflection* ref;
+                m_graphicsPipelineState = [device.GetMtlDevice() newRenderPipelineStateWithDescriptor:m_renderPipelineDesc options : MTLPipelineOptionBufferTypeInfo reflection : &ref error : &error];
+            }
            
            AZ_Assert(m_graphicsPipelineState, "Could not create Pipeline object!.");
-            [m_graphicsPipelineState retain];
-            [desc release];
-            desc = nil;
-            
            m_pipelineStateMultiSampleState = descriptor.m_renderStates.m_multisampleState;
            
            //Cache the rasterizer state
@ -207,20 +209,25 @@ namespace AZ
                                                    RHI::PipelineLibrary* pipelineLibraryBase)
        {
            Device& device = static_cast<Device&>(deviceBase);
-            MTLComputePipelineDescriptor* desc = [[MTLComputePipelineDescriptor alloc] init];
+            NSError* error = 0;
+            m_computePipelineDesc = [[MTLComputePipelineDescriptor alloc] init];
            RHI::ConstPtr<PipelineLayout> pipelineLayout = device.AcquirePipelineLayout(*descriptor.m_pipelineLayoutDescriptor);
            AZ_Assert(pipelineLayout, "PipelineLayout can not be null");
-            desc.computeFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_computeFunction.get());
-            AZ_Assert(desc.computeFunction, "Compute mtlFuntion can not be null");
+            m_computePipelineDesc.computeFunction = ExtractMtlFunction(device.GetMtlDevice(), descriptor.m_computeFunction.get());
+            AZ_Assert(m_computePipelineDesc.computeFunction, "Compute mtlFuntion can not be null");
            
-            NSError* error = 0;
-            MTLComputePipelineReflection* ref;
-            m_computePipelineState = [device.GetMtlDevice() newComputePipelineStateWithDescriptor:desc options:MTLPipelineOptionBufferTypeInfo reflection:&ref error:&error];
+            PipelineLibrary* pipelineLibrary = static_cast<PipelineLibrary*>(pipelineLibraryBase);
+            if (pipelineLibrary && pipelineLibrary->IsInitialized())
+            {
+                m_computePipelineState = pipelineLibrary->CreateComputePipelineState(static_cast<uint64_t>(descriptor.GetHash()), m_computePipelineDesc);
+            }
+            else
+            {
+                MTLComputePipelineReflection* ref;
+                m_computePipelineState = [device.GetMtlDevice() newComputePipelineStateWithDescriptor:m_computePipelineDesc options:MTLPipelineOptionBufferTypeInfo reflection:&ref error:&error];
+            }
            
            AZ_Assert(m_computePipelineState, "Could not create Pipeline object!.");
-            [m_computePipelineState retain];
-            [desc release];
-            desc = nil;
            
            if (m_computePipelineState)
            {
@ -262,12 +269,16 @@ namespace AZ
        {
            if (m_graphicsPipelineState)
            {
+                [m_renderPipelineDesc release];
+                m_renderPipelineDesc = nil;
                [m_graphicsPipelineState release];
                m_graphicsPipelineState = nil;
            }
            
            if (m_computePipelineState)
            {
+                [m_computePipelineDesc release];
+                m_computePipelineDesc = nil;
                [m_computePipelineState release];
                m_computePipelineState = nil;
            }
--- a/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineState.h
+++ b/Gems/Atom/RHI/Metal/Code/Source/RHI/PipelineState.h
@ -75,10 +75,13 @@ namespace AZ
            RHI::ConstPtr<PipelineLayout> m_pipelineLayout;
            AZStd::atomic_bool m_isCompiled = {false};

-            // Platform pipeline state.
+            // PSOs + descriptors
            id<MTLRenderPipelineState> m_graphicsPipelineState = nil;
            id<MTLComputePipelineState> m_computePipelineState = nil;
            id<MTLDepthStencilState> m_depthStencilState = nil;
+            MTLRenderPipelineDescriptor* m_renderPipelineDesc = nil;
+            MTLComputePipelineDescriptor* m_computePipelineDesc = nil;
+            
            AZ::u32 m_stencilRef = 0;
            RasterizerState m_rasterizerState;
            MTLPrimitiveType m_primitiveTopology = MTLPrimitiveTypeTriangle;
--- a/Gems/Atom/RHI/Null/Code/Source/RHI/PipelineLibrary.h
+++ b/Gems/Atom/RHI/Null/Code/Source/RHI/PipelineLibrary.h
@ -28,10 +28,11 @@ namespace AZ

            //////////////////////////////////////////////////////////////////////////
            // RHI::PipelineLibrary
-            RHI::ResultCode InitInternal([[maybe_unused]] RHI::Device& device, [[maybe_unused]] const RHI::PipelineLibraryData* serializedData) override { return RHI::ResultCode::Success;}
+            RHI::ResultCode InitInternal([[maybe_unused]] RHI::Device& device, [[maybe_unused]] const RHI::PipelineLibraryDescriptor& descriptor) override { return RHI::ResultCode::Success;}
            void ShutdownInternal() override {}
            RHI::ResultCode MergeIntoInternal([[maybe_unused]] AZStd::span<const RHI::PipelineLibrary* const> libraries) override { return RHI::ResultCode::Success;}
            RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override { return nullptr;}
+            bool SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const override { return true;}
            //////////////////////////////////////////////////////////////////////////
        };
    }
--- a/Gems/Atom/RHI/Vulkan/Code/Source/RHI/PipelineLibrary.cpp
+++ b/Gems/Atom/RHI/Vulkan/Code/Source/RHI/PipelineLibrary.cpp
@ -23,7 +23,7 @@ namespace AZ
            return m_nativePipelineCache;
        }

-        RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryData* serializedData)
+        RHI::ResultCode PipelineLibrary::InitInternal(RHI::Device& deviceBase, const RHI::PipelineLibraryDescriptor& descriptor)
        {
            DeviceObject::Init(deviceBase);
            auto& device = static_cast<Device&>(deviceBase);
@ -35,10 +35,10 @@ namespace AZ
            createInfo.initialDataSize = 0;
            createInfo.pInitialData = nullptr;

-            if (serializedData)
+            if (descriptor.m_serializedData)
            {
-                createInfo.initialDataSize = static_cast<size_t>(serializedData->GetData().size());
-                createInfo.pInitialData = serializedData->GetData().data();
+                createInfo.initialDataSize = static_cast<size_t>(descriptor.m_serializedData->GetData().size());
+                createInfo.pInitialData = descriptor.m_serializedData->GetData().data();
            }

            const VkResult result = vkCreatePipelineCache(device.GetNativeDevice(), &createInfo, nullptr, &m_nativePipelineCache);
@ -108,5 +108,13 @@ namespace AZ
                Debug::SetNameToObject(reinterpret_cast<uint64_t>(m_nativePipelineCache), name.data(), VK_OBJECT_TYPE_PIPELINE_CACHE, static_cast<Device&>(GetDevice()));
            }
        }
+
+        bool PipelineLibrary::SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const
+        {
+            //Vulkan drivers cannot save serialized data
+            [[maybe_unused]] Device& device = static_cast<Device&>(GetDevice());
+            AZ_Assert(!device.GetFeatures().m_isPsoCacheFileOperationsNeeded, "Explicit PSO cache operations should not be disabled for Vulkan");
+            return false;
+        }
    }
 }
--- a/Gems/Atom/RHI/Vulkan/Code/Source/RHI/PipelineLibrary.h
+++ b/Gems/Atom/RHI/Vulkan/Code/Source/RHI/PipelineLibrary.h
@ -40,10 +40,11 @@ namespace AZ

            //////////////////////////////////////////////////////////////////////////
            // RHI::PipelineLibrary
-            RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryData* serializedData) override;
+            RHI::ResultCode InitInternal(RHI::Device& device, const RHI::PipelineLibraryDescriptor& descriptor) override;
            void ShutdownInternal() override;
            RHI::ResultCode MergeIntoInternal(AZStd::span<const RHI::PipelineLibrary* const> libraries) override;
            RHI::ConstPtr<RHI::PipelineLibraryData> GetSerializedDataInternal() const override;
+            bool SaveSerializedDataInternal(const AZStd::string& filePath) const override;
            //////////////////////////////////////////////////////////////////////////

            VkPipelineCache m_nativePipelineCache = VK_NULL_HANDLE;
--- a/Gems/Atom/RPI/Code/Source/RPI.Public/Shader/Shader.cpp
+++ b/Gems/Atom/RPI/Code/Source/RPI.Public/Shader/Shader.cpp
@ -104,7 +104,7 @@ namespace AZ

                char pipelineLibraryPathTemp[AZ_MAX_PATH_LEN];
                azsnprintf(
-                    pipelineLibraryPathTemp, AZ_MAX_PATH_LEN, "@user@/Atom/PipelineStateCache_%s_%i_%i _Ver_%i/%s/%s_%s_%d.bin",
+                    pipelineLibraryPathTemp, AZ_MAX_PATH_LEN, "@user@/Atom/PipelineStateCache_%s_%u_%u_Ver_%i/%s/%s_%s_%d.bin",
                    ToString(physicalDeviceDesc.m_vendorId).data(), physicalDeviceDesc.m_deviceId, physicalDeviceDesc.m_driverVersion, PSOCacheVersion,
                    platformName.GetCStr(),
                    shaderName.GetCStr(),
@ -146,7 +146,7 @@ namespace AZ

                RHI::PipelineStateCache* pipelineStateCache = rhiSystem->GetPipelineStateCache();
                ConstPtr<RHI::PipelineLibraryData> serializedData = LoadPipelineLibrary();
-                RHI::PipelineLibraryHandle pipelineLibraryHandle = pipelineStateCache->CreateLibrary(serializedData.get());
+                RHI::PipelineLibraryHandle pipelineLibraryHandle = pipelineStateCache->CreateLibrary(serializedData.get(), m_pipelineLibraryPath);

                if (pipelineLibraryHandle.IsNull())
                {
@ -321,8 +321,10 @@ namespace AZ
        ///////////////////////////////////////////////////////////////////
        
        ConstPtr<RHI::PipelineLibraryData> Shader::LoadPipelineLibrary() const
-        { 
-            if (m_pipelineLibraryPath[0] != 0)
+        {
+            RHI::Device* device = RHI::RHISystemInterface::Get()->GetDevice();
+            //Check if explicit file load/save operation is needed as the RHI backend api may not support it
+            if (m_pipelineLibraryPath[0] != 0 && device->GetFeatures().m_isPsoCacheFileOperationsNeeded)
            {
                return Utils::LoadObjectFromFile<RHI::PipelineLibraryData>(m_pipelineLibraryPath);
            }
@ -331,12 +333,28 @@ namespace AZ

        void Shader::SavePipelineLibrary() const
        {
+            RHI::Device* device = RHI::RHISystemInterface::Get()->GetDevice();
            if (m_pipelineLibraryPath[0] != 0)
            {
-                RHI::ConstPtr<RHI::PipelineLibraryData> serializedData = m_pipelineStateCache->GetLibrarySerializedData(m_pipelineLibraryHandle);
-                if (serializedData)
+                RHI::ConstPtr<RHI::PipelineLibrary> pipelineLib = m_pipelineStateCache->GetMergedLibrary(m_pipelineLibraryHandle);
+                if(!pipelineLib)
+                {
+                    return;
+                }
+                
+                //Check if explicit file load/save operation is needed as the RHI backend api may not support it
+                if (device->GetFeatures().m_isPsoCacheFileOperationsNeeded)
+                {
+                    RHI::ConstPtr<RHI::PipelineLibraryData> serializedData = pipelineLib->GetSerializedData();
+                    if(serializedData)
+                    {
+                        Utils::SaveObjectToFile<RHI::PipelineLibraryData>(m_pipelineLibraryPath, DataStream::ST_BINARY, serializedData.get());
+                    }
+                }
+                else
                {
-                    Utils::SaveObjectToFile<RHI::PipelineLibraryData>(m_pipelineLibraryPath, DataStream::ST_BINARY, serializedData.get());
+                    [[maybe_unused]] bool result = pipelineLib->SaveSerializedData(m_pipelineLibraryPath);
+                    AZ_Error("Shader", result, "Pipeline Library %s was not saved", &m_pipelineLibraryPath);
                }
            }
        }
--- a/Gems/Atom/RPI/Code/Tests/Common/RHI/Stubs.h
+++ b/Gems/Atom/RPI/Code/Tests/Common/RHI/Stubs.h
@ -262,10 +262,11 @@ namespace UnitTest
            AZ_CLASS_ALLOCATOR(PipelineLibrary, AZ::SystemAllocator, 0);

        private:
-            AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, const AZ::RHI::PipelineLibraryData*) override { return AZ::RHI::ResultCode::Success; }
+            AZ::RHI::ResultCode InitInternal(AZ::RHI::Device&, [[maybe_unused]] const AZ::RHI::PipelineLibraryDescriptor& descriptor) override { return AZ::RHI::ResultCode::Success; }
            void ShutdownInternal() override {}
            AZ::RHI::ResultCode MergeIntoInternal(AZStd::span<const AZ::RHI::PipelineLibrary* const>) override { return AZ::RHI::ResultCode::Success; }
            AZ::RHI::ConstPtr<AZ::RHI::PipelineLibraryData> GetSerializedDataInternal() const override { return nullptr; }
+            bool SaveSerializedDataInternal([[maybe_unused]] const AZStd::string& filePath) const { return true;}
        };

        class ShaderStageFunction