diff --git a/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp b/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp index 5c510f67c1..c2bf9fe45c 100644 --- a/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp +++ b/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp @@ -6,8 +6,155 @@ * */ -#include #include +#include + +#define RECORDING_ENABLED 0 + +#if RECORDING_ENABLED + +#include +#include +#include +#include + +namespace +{ + class DebugAllocator + { + public: + using pointer_type = void*; + using size_type = AZStd::size_t; + using difference_type = AZStd::ptrdiff_t; + using allow_memory_leaks = AZStd::false_type; ///< Regular allocators should not leak. + + AZ_FORCE_INLINE pointer_type allocate(size_t byteSize, size_t alignment, int = 0) + { + return AZ_OS_MALLOC(byteSize, alignment); + } + AZ_FORCE_INLINE size_type resize(pointer_type, size_type) + { + return 0; + } + AZ_FORCE_INLINE void deallocate(pointer_type ptr, size_type, size_type) + { + AZ_OS_FREE(ptr); + } + }; + + #pragma pack(push, 1) + struct alignas(1) AllocatorOperation + { + enum OperationType : size_t + { + ALLOCATE, + DEALLOCATE + }; + OperationType m_type: 1; + size_t m_size : 28; // Can represent up to 256Mb requests + size_t m_alignment : 7; // Can represent up to 128 alignment + size_t m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids + }; + #pragma pack(pop) + static_assert(sizeof(AllocatorOperation) == 8); + + static AZStd::mutex s_operationsMutex = {}; + + static constexpr size_t s_maxNumberOfAllocationsToRecord = 16384; + static size_t s_numberOfAllocationsRecorded = 0; + static constexpr size_t s_allocationOperationCount = 5 * 1024; + static AZStd::array s_operations = {}; + static uint64_t s_operationCounter = 0; + + static unsigned int s_nextRecordId = 1; + using AllocatorOperationByAddress = AZStd::unordered_map, DebugAllocator>; + static AllocatorOperationByAddress s_allocatorOperationByAddress; + using AvailableRecordIds = AZStd::vector; + AvailableRecordIds s_availableRecordIds; + + void RecordAllocatorOperation(AllocatorOperation::OperationType type, void* ptr, size_t size = 0, size_t alignment = 0) + { + AZStd::scoped_lock lock(s_operationsMutex); + if (s_operationCounter == s_allocationOperationCount) + { + AZ::IO::SystemFile file; + int mode = AZ::IO::SystemFile::OpenMode::SF_OPEN_APPEND | AZ::IO::SystemFile::OpenMode::SF_OPEN_WRITE_ONLY; + if (!file.Exists("memoryrecordings.bin")) + { + mode |= AZ::IO::SystemFile::OpenMode::SF_OPEN_CREATE; + } + file.Open("memoryrecordings.bin", mode); + if (file.IsOpen()) + { + file.Write(&s_operations, sizeof(AllocatorOperation) * s_allocationOperationCount); + file.Close(); + } + s_operationCounter = 0; + } + AllocatorOperation& operation = s_operations[s_operationCounter++]; + operation.m_type = type; + if (type == AllocatorOperation::OperationType::ALLOCATE) + { + if (s_numberOfAllocationsRecorded > s_maxNumberOfAllocationsToRecord) + { + // reached limit of allocations, dont record anymore + --s_operationCounter; + return; + } + ++s_numberOfAllocationsRecorded; + operation.m_size = size; + operation.m_alignment = alignment; + unsigned int recordId = 0; + if (!s_availableRecordIds.empty()) + { + recordId = s_availableRecordIds.back(); + s_availableRecordIds.pop_back(); + } + else + { + recordId = s_nextRecordId; + ++s_nextRecordId; + } + operation.m_recordId = recordId; + auto it = s_allocatorOperationByAddress.emplace(ptr, operation); + if (!it.second) + { + // double alloc or resize, leave the current record and return the id + operation = it.first->second; + s_availableRecordIds.emplace_back(recordId); + } + } + else + { + if (ptr == nullptr) + { + // common scenario, just record the operation + operation.m_size = 0; + operation.m_alignment = 0; + operation.m_recordId = 0; // recordId = 0 will flag this case + } + else + { + auto it = s_allocatorOperationByAddress.find(ptr); + if (it != s_allocatorOperationByAddress.end()) + { + operation.m_size = it->second.m_size; + operation.m_alignment = it->second.m_alignment; + operation.m_recordId = it->second.m_recordId; + s_availableRecordIds.push_back(it->second.m_recordId); + s_allocatorOperationByAddress.erase(it); + } + else + { + // just dont record this operation + --s_operationCounter; + } + } + } + + } +} +#endif namespace AZ { @@ -150,6 +297,10 @@ namespace AZ records->RegisterAllocation(ptr, byteSize, alignment, name, fileName, lineNum, suppressStackRecord + 1); } } + +#if RECORDING_ENABLED + RecordAllocatorOperation(AllocatorOperation::ALLOCATE, ptr, byteSize, alignment); +#endif } void AllocatorBase::ProfileDeallocation(void* ptr, size_t byteSize, size_t alignment, Debug::AllocationInfo* info) @@ -162,6 +313,9 @@ namespace AZ records->UnregisterAllocation(ptr, byteSize, alignment, info); } } +#if RECORDING_ENABLED + RecordAllocatorOperation(AllocatorOperation::DEALLOCATE, ptr, byteSize, alignment); +#endif } void AllocatorBase::ProfileReallocationBegin([[maybe_unused]] void* ptr, [[maybe_unused]] size_t newSize) @@ -176,6 +330,10 @@ namespace AZ ProfileDeallocation(ptr, 0, 0, &info); ProfileAllocation(newPtr, newSize, newAlignment, info.m_name, info.m_fileName, info.m_lineNum, 0); } +#if RECORDING_ENABLED + RecordAllocatorOperation(AllocatorOperation::DEALLOCATE, ptr); + RecordAllocatorOperation(AllocatorOperation::ALLOCATE, newPtr, newSize, newAlignment); +#endif } void AllocatorBase::ProfileReallocation(void* ptr, void* newPtr, size_t newSize, size_t newAlignment) @@ -193,6 +351,9 @@ namespace AZ records->ResizeAllocation(ptr, newSize); } } +#if RECORDING_ENABLED + RecordAllocatorOperation(AllocatorOperation::ALLOCATE, ptr, newSize); +#endif } bool AllocatorBase::OnOutOfMemory(size_t byteSize, size_t alignment, int flags, const char* name, const char* fileName, int lineNum) diff --git a/Code/Framework/AzCore/AzCore/Memory/HeapSchema.cpp b/Code/Framework/AzCore/AzCore/Memory/HeapSchema.cpp index aceafa1b28..1f0fc59a97 100644 --- a/Code/Framework/AzCore/AzCore/Memory/HeapSchema.cpp +++ b/Code/Framework/AzCore/AzCore/Memory/HeapSchema.cpp @@ -115,6 +115,7 @@ namespace AZ m_ownMemoryBlock[i] = false; } + AZ_Assert(m_desc.m_numMemoryBlocks > 0, "At least one memory block is required"); for (int i = 0; i < m_desc.m_numMemoryBlocks; ++i) { if (m_desc.m_memoryBlocks[i] == nullptr) // Allocate memory block if requested! @@ -131,17 +132,6 @@ namespace AZ m_capacity += m_desc.m_memoryBlocksByteSize[i]; } - - if (m_desc.m_numMemoryBlocks == 0) - { - // Create default memory space if we can to serve for default allocations - m_memSpaces[0] = AZDLMalloc::create_mspace(0, m_desc.m_isMultithreadAlloc); - if (m_memSpaces[0]) - { - AZDLMalloc::mspace_az_set_expandable(m_memSpaces[0], true); - m_capacity = Platform::GetHeapCapacity(); - } - } } HeapSchema::~HeapSchema() diff --git a/Code/Framework/AzCore/AzCore/Memory/HeapSchema.h b/Code/Framework/AzCore/AzCore/Memory/HeapSchema.h index f72ae31057..3a7716a127 100644 --- a/Code/Framework/AzCore/AzCore/Memory/HeapSchema.h +++ b/Code/Framework/AzCore/AzCore/Memory/HeapSchema.h @@ -32,17 +32,11 @@ namespace AZ */ struct Descriptor { - Descriptor() - : m_numMemoryBlocks(0) - , m_isMultithreadAlloc(true) - {} - - static const int m_memoryBlockAlignment = 64 * 1024; static const int m_maxNumBlocks = 5; - int m_numMemoryBlocks; ///< Number of memory blocks to use. - void* m_memoryBlocks[m_maxNumBlocks]; ///< Pointers to provided memory blocks or NULL if you want the system to allocate them for you with the System Allocator. - size_t m_memoryBlocksByteSize[m_maxNumBlocks]; ///< Sizes of different memory blocks, if m_memoryBlock is 0 the block will be allocated for you with the System Allocator. - bool m_isMultithreadAlloc; ///< Set to true to enable multi threading safe allocation. + int m_numMemoryBlocks = 1; ///< Number of memory blocks to use. + void* m_memoryBlocks[m_maxNumBlocks] = {}; ///< Pointers to provided memory blocks or NULL if you want the system to allocate them for you with the System Allocator. + size_t m_memoryBlocksByteSize[m_maxNumBlocks] = {4 * 1024}; ///< Sizes of different memory blocks, if m_memoryBlock is 0 the block will be allocated for you with the System Allocator. + bool m_isMultithreadAlloc = true; ///< Set to true to enable multi threading safe allocation. }; HeapSchema(const Descriptor& desc); diff --git a/Code/Framework/AzCore/AzCore/Memory/SystemAllocator.cpp b/Code/Framework/AzCore/AzCore/Memory/SystemAllocator.cpp index 15cf5de8bc..41099f7a38 100644 --- a/Code/Framework/AzCore/AzCore/Memory/SystemAllocator.cpp +++ b/Code/Framework/AzCore/AzCore/Memory/SystemAllocator.cpp @@ -18,7 +18,6 @@ #define AZCORE_SYSTEM_ALLOCATOR_HPHA 1 #define AZCORE_SYSTEM_ALLOCATOR_MALLOC 2 -#define AZCORE_SYSTEM_ALLOCATOR_HEAP 3 #if !defined(AZCORE_SYSTEM_ALLOCATOR) // define the default @@ -29,8 +28,6 @@ #include #elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC #include -#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP - #include #else #error "Invalid allocator selected for SystemAllocator" #endif @@ -44,8 +41,6 @@ namespace AZ static AZStd::aligned_storage::value>::type g_systemSchema; #elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC static AZStd::aligned_storage::value>::type g_systemSchema; -#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP - static AZStd::aligned_storage::value>::type g_systemSchema; #endif ////////////////////////////////////////////////////////////////////////// @@ -118,11 +113,6 @@ namespace AZ heapDesc.m_systemChunkSize = desc.m_heap.m_systemChunkSize; #elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC MallocSchema::Descriptor heapDesc; -#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP - HeapSchema::Descriptor heapDesc; - memcpy(heapDesc.m_memoryBlocks, desc.m_heap.m_memoryBlocks, sizeof(heapDesc.m_memoryBlocks)); - memcpy(heapDesc.m_memoryBlocksByteSize, desc.m_heap.m_memoryBlocksByteSize, sizeof(heapDesc.m_memoryBlocksByteSize)); - heapDesc.m_numMemoryBlocks = desc.m_heap.m_numMemoryBlocks; #endif if (&AllocatorInstance::Get() == this) // if we are the system allocator { @@ -132,8 +122,6 @@ namespace AZ m_allocator = new (&g_systemSchema) HphaSchema(heapDesc); #elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC m_allocator = new (&g_systemSchema) MallocSchema(heapDesc); -#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP - m_allocator = new (&g_systemSchema) HeapSchema(heapDesc); #endif g_isSystemSchemaUsed = true; isReady = true; @@ -149,8 +137,6 @@ namespace AZ m_allocator = azcreate(HphaSchema, (heapDesc), SystemAllocator); #elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC m_allocator = azcreate(MallocSchema, (heapDesc), SystemAllocator); -#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP - m_allocator = azcreate(HeapSchema, (heapDesc), SystemAllocator); #endif if (m_allocator == nullptr) { @@ -186,8 +172,6 @@ namespace AZ static_cast(m_allocator)->~HphaSchema(); #elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC static_cast(m_allocator)->~MallocSchema(); -#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP - static_cast(m_allocator)->~HeapSchema(); #endif g_isSystemSchemaUsed = false; } diff --git a/Code/Framework/AzCore/CMakeLists.txt b/Code/Framework/AzCore/CMakeLists.txt index 96ed838ccc..838142f0df 100644 --- a/Code/Framework/AzCore/CMakeLists.txt +++ b/Code/Framework/AzCore/CMakeLists.txt @@ -146,6 +146,11 @@ if(PAL_TRAIT_BUILD_TESTS_SUPPORTED) PROPERTY COMPILE_DEFINITIONS VALUES AZCORETEST_DLL_NAME=\"$\" ) + ly_add_target_files( + TARGETS AzCore.Tests + FILES ${CMAKE_CURRENT_SOURCE_DIR}/Tests/Memory/AllocatorBenchmarkRecordings.bin + OUTPUT_SUBDIRECTORY Tests/AzCore/Memory + ) endif() diff --git a/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin new file mode 100644 index 0000000000..ec5de82e83 --- /dev/null +++ b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:281ba03e79ecba90b313a0b17bdba87c57d76b504b6e38d579b5eabd995902cc +size 245760 diff --git a/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp new file mode 100644 index 0000000000..bc477e41dc --- /dev/null +++ b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp @@ -0,0 +1,591 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#if defined(HAVE_BENCHMARK) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace Benchmark +{ + namespace Platform + { + size_t GetProcessMemoryUsageBytes(); + size_t GetMemorySize(void* memory); + } + + /// + /// Test allocator wrapper that redirects the calls to the passed TAllocator by using AZ::AllocatorInstance. + /// It also creates/destroys the TAllocator type (to reflect what happens at runtime) + /// + /// Allocator type to wrap + template + class TestAllocatorWrapper + { + public: + static void SetUp() + { + AZ::AllocatorInstance::Create(); + } + + static void TearDown() + { + AZ::AllocatorInstance::Destroy(); + } + + static void* Allocate(size_t byteSize, size_t alignment) + { + return AZ::AllocatorInstance::Get().Allocate(byteSize, alignment); + } + + static void DeAllocate(void* ptr, size_t byteSize = 0) + { + AZ::AllocatorInstance::Get().DeAllocate(ptr, byteSize); + } + + static void* ReAllocate(void* ptr, size_t newSize, size_t newAlignment) + { + return AZ::AllocatorInstance::Get().ReAllocate(ptr, newSize, newAlignment); + } + + static size_t Resize(void* ptr, size_t newSize) + { + return AZ::AllocatorInstance::Get().Resize(ptr, newSize); + } + + static void GarbageCollect() + { + AZ::AllocatorInstance::Get().GarbageCollect(); + } + + static size_t NumAllocatedBytes() + { + return AZ::AllocatorInstance::Get().NumAllocatedBytes() + + AZ::AllocatorInstance::Get().GetUnAllocatedMemory(); + } + + static size_t GetSize(void* ptr) + { + return AZ::AllocatorInstance::Get().AllocationSize(ptr); + } + }; + + /// + /// Basic allocator used as a baseline. This allocator is the most basic allocation possible with the OS (AZ_OS_MALLOC). + /// MallocSchema cannot be used here because it has extra logic that we don't want to use as a baseline. + /// + class RawMallocAllocator {}; + + template<> + class TestAllocatorWrapper + { + public: + TestAllocatorWrapper() + { + s_numAllocatedBytes = 0; + } + + static void SetUp() + { + s_numAllocatedBytes = 0; + } + + static void TearDown() + { + } + + // IAllocatorAllocate + static void* Allocate(size_t byteSize, size_t) + { + s_numAllocatedBytes += byteSize; + // Don't pass an alignment since we wont be able to get the memory size without also passing the alignment + return AZ_OS_MALLOC(byteSize, 1); + } + + static void DeAllocate(void* ptr, size_t = 0) + { + s_numAllocatedBytes -= Platform::GetMemorySize(ptr); + AZ_OS_FREE(ptr); + } + + static void* ReAllocate(void* ptr, size_t newSize, size_t) + { + s_numAllocatedBytes -= Platform::GetMemorySize(ptr); + AZ_OS_FREE(ptr); + + s_numAllocatedBytes += newSize; + return AZ_OS_MALLOC(newSize, 1); + } + + static size_t Resize(void* ptr, size_t newSize) + { + AZ_UNUSED(ptr); + AZ_UNUSED(newSize); + + return 0; + } + + static void GarbageCollect() {} + + static size_t NumAllocatedBytes() + { + return s_numAllocatedBytes; + } + + static size_t GetSize(void* ptr) + { + return Platform::GetMemorySize(ptr); + } + + private: + static size_t s_numAllocatedBytes; + }; + + size_t TestAllocatorWrapper::s_numAllocatedBytes = 0; + + // Some allocator are not fully declared, those we simply setup from the schema + class MallocSchemaAllocator : public AZ::SimpleSchemaAllocator + { + public: + AZ_TYPE_INFO(MallocSchemaAllocator, "{3E68224F-E676-402C-8276-CE4B49C05E89}"); + + MallocSchemaAllocator() + : AZ::SimpleSchemaAllocator("MallocSchemaAllocator", "") + {} + }; + + // We use both this HphaSchemaAllocator and the SystemAllocator configured with Hpha because the SystemAllocator + // has extra things + class HphaSchemaAllocator : public AZ::SimpleSchemaAllocator + { + public: + AZ_TYPE_INFO(HphaSchemaAllocator, "{6563AB4B-A68E-4499-8C98-D61D640D1F7F}"); + + HphaSchemaAllocator() + : AZ::SimpleSchemaAllocator("TestHphaSchemaAllocator", "") + {} + }; + + // For the SystemAllocator we inherit so we have a different stack. The SystemAllocator is used globally so we dont want + // to get that data affecting the benchmark + class TestSystemAllocator : public AZ::SystemAllocator + { + public: + AZ_TYPE_INFO(TestSystemAllocator, "{360D4DAA-D65D-4D5C-A6FA-1A4C5261C35C}"); + + TestSystemAllocator() + : AZ::SystemAllocator() + { + } + }; + + // Allocated bytes reported by the allocator + static const char* s_counterAllocatorMemory = "Allocator_Memory"; + + // Allocated bytes as counted by the benchmark + static const char* s_counterBenchmarkMemory = "Benchmark_Memory"; + + enum AllocationSize + { + SMALL, + BIG, + MIXED, + COUNT + }; + + static const size_t s_kiloByte = 1024; + static const size_t s_megaByte = s_kiloByte * s_kiloByte; + using AllocationSizeArray = AZStd::array; + static const AZStd::array s_allocationSizes = { + /* SMALL */ AllocationSizeArray{ 2, 16, 20, 59, 100, 128, 160, 250, 300, 512 }, + /* BIG */ AllocationSizeArray{ 513, s_kiloByte, 2 * s_kiloByte, 4 * s_kiloByte, 10 * s_kiloByte, 64 * s_kiloByte, 128 * s_kiloByte, 200 * s_kiloByte, s_megaByte, 2 * s_megaByte }, + /* MIXED */ AllocationSizeArray{ 2, s_kiloByte, 59, 4 * s_kiloByte, 128, 200 * s_kiloByte, 250, s_megaByte, 512, 2 * s_megaByte } + }; + + template + class AllocatorBenchmarkFixture + : public ::benchmark::Fixture + { + protected: + using TestAllocatorType = TestAllocatorWrapper; + + virtual void internalSetUp(const ::benchmark::State& state) + { + if (state.thread_index == 0) // Only setup in the first thread + { + TestAllocatorType::SetUp(); + + m_allocations.resize(state.threads); + for (auto& perThreadAllocations : m_allocations) + { + perThreadAllocations.resize(state.range(0), nullptr); + } + } + } + + virtual void internalTearDown(const ::benchmark::State& state) + { + if (state.thread_index == 0) // Only setup in the first thread + { + m_allocations.clear(); + m_allocations.shrink_to_fit(); + + TestAllocatorType::TearDown(); + } + } + + AZStd::vector& GetPerThreadAllocations(size_t threadIndex) + { + return m_allocations[threadIndex]; + } + + public: + void SetUp(const ::benchmark::State& state) override + { + internalSetUp(state); + } + void SetUp(::benchmark::State& state) override + { + internalSetUp(state); + } + + void TearDown(const ::benchmark::State& state) override + { + internalTearDown(state); + } + void TearDown(::benchmark::State& state) override + { + internalTearDown(state); + } + + private: + AZStd::vector> m_allocations; + }; + + template + class AllocationBenchmarkFixture + : public AllocatorBenchmarkFixture + { + using base = AllocatorBenchmarkFixture; + using TestAllocatorType = typename base::TestAllocatorType; + + public: + void Benchmark(benchmark::State& state) + { + for (auto _ : state) + { + state.PauseTiming(); + + AZStd::vector& perThreadAllocations = base::GetPerThreadAllocations(state.thread_index); + const size_t numberOfAllocations = perThreadAllocations.size(); + size_t totalAllocationSize = 0; + for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex) + { + const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize]; + const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()]; + totalAllocationSize += allocationSize; + + state.ResumeTiming(); + perThreadAllocations[allocationIndex] = TestAllocatorType::Allocate(allocationSize, 0); + state.PauseTiming(); + } + + state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults); + state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast(totalAllocationSize), benchmark::Counter::kDefaults); + + for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex) + { + const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize]; + const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()]; + TestAllocatorType::DeAllocate(perThreadAllocations[allocationIndex], allocationSize); + perThreadAllocations[allocationIndex] = nullptr; + } + TestAllocatorType::GarbageCollect(); + + state.SetItemsProcessed(numberOfAllocations); + } + } + }; + + template + class DeAllocationBenchmarkFixture + : public AllocatorBenchmarkFixture + { + using base = AllocatorBenchmarkFixture; + using TestAllocatorType = typename base::TestAllocatorType; + + public: + void Benchmark(benchmark::State& state) + { + for (auto _ : state) + { + state.PauseTiming(); + AZStd::vector& perThreadAllocations = base::GetPerThreadAllocations(state.thread_index); + + const size_t numberOfAllocations = perThreadAllocations.size(); + size_t totalAllocationSize = 0; + for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex) + { + const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize]; + const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()]; + totalAllocationSize += allocationSize; + perThreadAllocations[allocationIndex] = TestAllocatorType::Allocate(allocationSize, 0); + } + + for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex) + { + const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize]; + const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()]; + state.ResumeTiming(); + TestAllocatorType::DeAllocate(perThreadAllocations[allocationIndex], allocationSize); + state.PauseTiming(); + perThreadAllocations[allocationIndex] = nullptr; + } + + state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults); + state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast(totalAllocationSize), benchmark::Counter::kDefaults); + + state.SetItemsProcessed(numberOfAllocations); + + TestAllocatorType::GarbageCollect(); + } + } + }; + + template + class RecordedAllocationBenchmarkFixture : public ::benchmark::Fixture + { + using TestAllocatorType = TestAllocatorWrapper; + + virtual void internalSetUp() + { + TestAllocatorType::SetUp(); + } + + void internalTearDown() + { + TestAllocatorType::TearDown(); + } + + #pragma pack(push, 1) + struct alignas(1) AllocatorOperation + { + enum OperationType : size_t + { + ALLOCATE, + DEALLOCATE + }; + OperationType m_type : 1; + size_t m_size : 28; // Can represent up to 256Mb requests + size_t m_alignment : 7; // Can represent up to 128 alignment + size_t m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids + }; + #pragma pack(pop) + static_assert(sizeof(AllocatorOperation) == 8); + + public: + void SetUp(const ::benchmark::State&) override + { + internalSetUp(); + } + void SetUp(::benchmark::State&) override + { + internalSetUp(); + } + + void TearDown(const ::benchmark::State&) override + { + internalTearDown(); + } + void TearDown(::benchmark::State&) override + { + internalTearDown(); + } + + void Benchmark(benchmark::State& state) + { + for (auto _ : state) + { + state.PauseTiming(); + + AZStd::unordered_map pointerRemapping; + constexpr size_t allocationOperationCount = 5 * 1024; + AZStd::array m_operations = {}; + [[maybe_unused]] const size_t operationSize = sizeof(AllocatorOperation); + + size_t totalAllocationSize = 0; + size_t itemsProcessed = 0; + + for (size_t i = 0; i < 100; ++i) // play the recording multiple times to get a good stable sample, this way we can keep a smaller recording + { + AZ::IO::SystemFile file; + AZ::IO::FixedMaxPathString filePath = AZ::Utils::GetExecutableDirectory(); + filePath += "/Tests/AzCore/Memory/AllocatorBenchmarkRecordings.bin"; + if (!file.Open(filePath.c_str(), AZ::IO::SystemFile::OpenMode::SF_OPEN_READ_ONLY)) + { + return; + } + size_t elementsRead = + file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations) / sizeof(AllocatorOperation); + itemsProcessed += elementsRead; + + while (elementsRead > 0) + { + for (size_t operationIndex = 0; operationIndex < elementsRead; ++operationIndex) + { + const AllocatorOperation& operation = m_operations[operationIndex]; + if (operation.m_type == AllocatorOperation::ALLOCATE) + { + const auto it = pointerRemapping.emplace(operation.m_recordId, nullptr); + if (it.second) // otherwise already allocated + { + state.ResumeTiming(); + void* ptr = TestAllocatorType::Allocate(operation.m_size, operation.m_alignment); + state.PauseTiming(); + totalAllocationSize += operation.m_size; + it.first->second = ptr; + } + else + { + // Doing a resize, dont account for this memory change, this operation is rare and we dont have + // the size of the previous allocation + state.ResumeTiming(); + TestAllocatorType::Resize(it.first->second, operation.m_size); + state.PauseTiming(); + } + } + else // AllocatorOperation::DEALLOCATE: + { + if (operation.m_recordId) + { + const auto ptrIt = pointerRemapping.find(operation.m_recordId); + if (ptrIt != pointerRemapping.end()) + { + totalAllocationSize -= operation.m_size; + state.ResumeTiming(); + TestAllocatorType::DeAllocate( + ptrIt->second, + /*operation.m_size*/ 0); // size is not correct after a resize, a 0 size deals with it + state.PauseTiming(); + pointerRemapping.erase(ptrIt); + } + } + else // deallocate(nullptr) are recorded + { + // Just to account of the call of deallocate(nullptr); + state.ResumeTiming(); + TestAllocatorType::DeAllocate(nullptr, /*operation.m_size*/ 0); + state.PauseTiming(); + } + } + } + + elementsRead = + file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations) / sizeof(AllocatorOperation); + itemsProcessed += elementsRead; + } + file.Close(); + + // Deallocate the remainder (since we stopped the recording middle-game)(there are leaks as well) + for (const auto& pointerMapping : pointerRemapping) + { + state.ResumeTiming(); + TestAllocatorType::DeAllocate(pointerMapping.second); + state.PauseTiming(); + } + itemsProcessed += pointerRemapping.size(); + pointerRemapping.clear(); + } + + state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults); + state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast(totalAllocationSize), benchmark::Counter::kDefaults); + + state.SetItemsProcessed(itemsProcessed); + + TestAllocatorType::GarbageCollect(); + } + } + }; + + // For non-threaded ranges, run 100, 400, 1600 amounts + static void RunRanges(benchmark::internal::Benchmark* b) + { + for (int i = 0; i < 6; i += 2) + { + b->Arg((1 << i) * 100); + } + } + static void RecordedRunRanges(benchmark::internal::Benchmark* b) + { + b->Iterations(1); + } + + // For threaded ranges, run just 200, multi-threaded will already multiply by thread + static void ThreadedRunRanges(benchmark::internal::Benchmark* b) + { + b->Arg(100); + } + + // Test under and over-subscription of threads vs the amount of CPUs available + static const unsigned int MaxThreadRange = 2 * AZStd::thread::hardware_concurrency(); + +#define BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME, ...) \ + BENCHMARK_TEMPLATE_DEFINE_F(FIXTURE, TESTNAME, __VA_ARGS__)(benchmark::State& state) { Benchmark(state); } \ + BENCHMARK_REGISTER_F(FIXTURE, TESTNAME) + + // We test small/big/mixed allocations in single-threaded environments. For multi-threaded environments, we test mixed since + // the multi threaded fixture will run multiple passes (1, 2, 4, ... until 2*hardware_concurrency) +#define BM_REGISTER_SIZE_FIXTURES(FIXTURE, TESTNAME, ALLOCATORTYPE) \ + BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_SMALL, ALLOCATORTYPE, SMALL)->Apply(RunRanges); \ + BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_BIG, ALLOCATORTYPE, BIG)->Apply(RunRanges); \ + BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_MIXED, ALLOCATORTYPE, MIXED)->Apply(RunRanges); \ + BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_MIXED_THREADED, ALLOCATORTYPE, MIXED)->ThreadRange(2, MaxThreadRange)->Apply(ThreadedRunRanges); + +#define BM_REGISTER_ALLOCATOR(TESTNAME, ALLOCATORTYPE) \ + namespace BM_##TESTNAME \ + { \ + BM_REGISTER_SIZE_FIXTURES(AllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE); \ + BM_REGISTER_SIZE_FIXTURES(DeAllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE); \ + BM_REGISTER_TEMPLATE(RecordedAllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE)->Apply(RecordedRunRanges); \ + } + + /// Warm up benchmark used to prepare the OS for allocations. Most OS keep allocations for a process somehow + /// reserved. So the first allocations run always get a bigger impact in a process. This warm up allocator runs + /// all the benchmarks and is just used for the the next allocators to report more consistent results. + BM_REGISTER_ALLOCATOR(WarmUpAllocator, RawMallocAllocator); + + BM_REGISTER_ALLOCATOR(RawMallocAllocator, RawMallocAllocator); + BM_REGISTER_ALLOCATOR(MallocSchemaAllocator, MallocSchemaAllocator); + BM_REGISTER_ALLOCATOR(HphaSchemaAllocator, HphaSchemaAllocator); + BM_REGISTER_ALLOCATOR(SystemAllocator, TestSystemAllocator); + + //BM_REGISTER_ALLOCATOR(BestFitExternalMapAllocator, BestFitExternalMapAllocator); // Requires to pre-allocate blocks and cannot work as a general-purpose allocator + //BM_REGISTER_ALLOCATOR(HeapSchemaAllocator, TestHeapSchemaAllocator); // Requires to pre-allocate blocks and cannot work as a general-purpose allocator + //BM_REGISTER_SCHEMA(PoolSchema); // Requires special alignment requests while allocating + +#undef BM_REGISTER_ALLOCATOR +#undef BM_REGISTER_SIZE_FIXTURES +#undef BM_REGISTER_TEMPLATE + +} // Benchmark + +#endif // HAVE_BENCHMARK diff --git a/Code/Framework/AzCore/Tests/Memory/HphaSchema.cpp b/Code/Framework/AzCore/Tests/Memory/HphaSchema.cpp index 85dd79931d..08b84416e6 100644 --- a/Code/Framework/AzCore/Tests/Memory/HphaSchema.cpp +++ b/Code/Framework/AzCore/Tests/Memory/HphaSchema.cpp @@ -10,10 +10,6 @@ #include #include -#if defined(HAVE_BENCHMARK) -#include -#endif // HAVE_BENCHMARK - class HphaSchema_TestAllocator : public AZ::SimpleSchemaAllocator { @@ -112,87 +108,3 @@ namespace UnitTest HphaSchemaTestFixture, ::testing::ValuesIn(s_mixedInstancesParameters)); } - - -#if defined(HAVE_BENCHMARK) -namespace Benchmark -{ - class HphaSchemaBenchmarkFixture - : public ::benchmark::Fixture - { - void internalSetUp() - { - AZ::AllocatorInstance::Create(); - } - - void internalTearDown() - { - AZ::AllocatorInstance::Destroy(); - } - - public: - void SetUp(const benchmark::State&) override - { - internalSetUp(); - } - void SetUp(benchmark::State&) override - { - internalSetUp(); - } - void TearDown(const benchmark::State&) override - { - internalTearDown(); - } - void TearDown(benchmark::State&) override - { - internalTearDown(); - } - - static void BM_Allocations(benchmark::State& state, const AllocationSizeArray& allocationArray) - { - AZStd::vector allocations; - while (state.KeepRunning()) - { - state.PauseTiming(); - const size_t allocationIndex = allocations.size(); - const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()]; - - state.ResumeTiming(); - void* allocation = AZ::AllocatorInstance::Get().Allocate(allocationSize, 0); - - state.PauseTiming(); - allocations.emplace_back(allocation); - - state.ResumeTiming(); - } - - const size_t numberOfAllocations = allocations.size(); - state.SetItemsProcessed(numberOfAllocations); - - for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex) - { - AZ::AllocatorInstance::Get().DeAllocate(allocations[allocationIndex], allocationArray[allocationIndex % allocationArray.size()]); - } - AZ::AllocatorInstance::Get().GarbageCollect(); - } - }; - - // Small allocations, these are allocations that are going to end up in buckets in the HphaSchema - BENCHMARK_F(HphaSchemaBenchmarkFixture, SmallAllocations)(benchmark::State& state) - { - BM_Allocations(state, s_smallAllocationSizes); - } - - BENCHMARK_F(HphaSchemaBenchmarkFixture, BigAllocations)(benchmark::State& state) - { - BM_Allocations(state, s_bigAllocationSizes); - } - - BENCHMARK_F(HphaSchemaBenchmarkFixture, MixedAllocations)(benchmark::State& state) - { - BM_Allocations(state, s_mixedAllocationSizes); - } - - -} // Benchmark -#endif // HAVE_BENCHMARK diff --git a/Code/Framework/AzCore/Tests/Platform/Android/Tests/Memory/AllocatorBenchmarks_Android.cpp b/Code/Framework/AzCore/Tests/Platform/Android/Tests/Memory/AllocatorBenchmarks_Android.cpp new file mode 100644 index 0000000000..636d5519d8 --- /dev/null +++ b/Code/Framework/AzCore/Tests/Platform/Android/Tests/Memory/AllocatorBenchmarks_Android.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include +#include + +#include +#include + +namespace Benchmark +{ + namespace Platform + { + size_t GetProcessMemoryUsageBytes() + { + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + return rusage.ru_maxrss * 1024L; + } + + size_t GetMemorySize(void* memory) + { + return memory ? malloc_usable_size(memory) : 0; + } + } +} diff --git a/Code/Framework/AzCore/Tests/Platform/Android/platform_android_files.cmake b/Code/Framework/AzCore/Tests/Platform/Android/platform_android_files.cmake index ed54a84dbf..3ad1bd3185 100644 --- a/Code/Framework/AzCore/Tests/Platform/Android/platform_android_files.cmake +++ b/Code/Framework/AzCore/Tests/Platform/Android/platform_android_files.cmake @@ -8,4 +8,5 @@ set(FILES Tests/UtilsTests_Android.cpp + Tests/Memory/AllocatorBenchmarks_Android.cpp ) diff --git a/Code/Framework/AzCore/Tests/Platform/Linux/Tests/Memory/AllocatorBenchmarks_Linux.cpp b/Code/Framework/AzCore/Tests/Platform/Linux/Tests/Memory/AllocatorBenchmarks_Linux.cpp new file mode 100644 index 0000000000..636d5519d8 --- /dev/null +++ b/Code/Framework/AzCore/Tests/Platform/Linux/Tests/Memory/AllocatorBenchmarks_Linux.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include +#include + +#include +#include + +namespace Benchmark +{ + namespace Platform + { + size_t GetProcessMemoryUsageBytes() + { + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + return rusage.ru_maxrss * 1024L; + } + + size_t GetMemorySize(void* memory) + { + return memory ? malloc_usable_size(memory) : 0; + } + } +} diff --git a/Code/Framework/AzCore/Tests/Platform/Linux/platform_linux_files.cmake b/Code/Framework/AzCore/Tests/Platform/Linux/platform_linux_files.cmake index 844b621e05..953dbb7791 100644 --- a/Code/Framework/AzCore/Tests/Platform/Linux/platform_linux_files.cmake +++ b/Code/Framework/AzCore/Tests/Platform/Linux/platform_linux_files.cmake @@ -9,4 +9,5 @@ set(FILES Tests/UtilsTests_Linux.cpp ../Common/UnixLike/Tests/UtilsTests_UnixLike.cpp + Tests/Memory/AllocatorBenchmarks_Linux.cpp ) diff --git a/Code/Framework/AzCore/Tests/Platform/Mac/Tests/Memory/AllocatorBenchmarks_Mac.cpp b/Code/Framework/AzCore/Tests/Platform/Mac/Tests/Memory/AllocatorBenchmarks_Mac.cpp new file mode 100644 index 0000000000..932252985a --- /dev/null +++ b/Code/Framework/AzCore/Tests/Platform/Mac/Tests/Memory/AllocatorBenchmarks_Mac.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include +#include + +#include +#include + +namespace Benchmark +{ + namespace Platform + { + size_t GetProcessMemoryUsageBytes() + { + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + return rusage.ru_maxrss; + } + + size_t GetMemorySize(void* memory) + { + return memory ? malloc_size(memory) : 0; + } + } +} diff --git a/Code/Framework/AzCore/Tests/Platform/Mac/platform_mac_files.cmake b/Code/Framework/AzCore/Tests/Platform/Mac/platform_mac_files.cmake index 93d2daf2b8..14e39d47f4 100644 --- a/Code/Framework/AzCore/Tests/Platform/Mac/platform_mac_files.cmake +++ b/Code/Framework/AzCore/Tests/Platform/Mac/platform_mac_files.cmake @@ -9,4 +9,5 @@ set(FILES ../Common/Apple/Tests/UtilsTests_Apple.cpp ../Common/UnixLike/Tests/UtilsTests_UnixLike.cpp + Tests/Memory/AllocatorBenchmarks_Mac.cpp ) diff --git a/Code/Framework/AzCore/Tests/Platform/Windows/Tests/Memory/AllocatorBenchmarks_Windows.cpp b/Code/Framework/AzCore/Tests/Platform/Windows/Tests/Memory/AllocatorBenchmarks_Windows.cpp new file mode 100644 index 0000000000..e9571a7e5b --- /dev/null +++ b/Code/Framework/AzCore/Tests/Platform/Windows/Tests/Memory/AllocatorBenchmarks_Windows.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) Contributors to the Open 3D Engine Project. + * For complete copyright and license terms please see the LICENSE at the root of this distribution. + * + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + */ + +#include +#include + +#include +#include + +namespace Benchmark +{ + namespace Platform + { + size_t GetProcessMemoryUsageBytes() + { + EmptyWorkingSet(GetCurrentProcess()); + + size_t memoryUsage = 0; + MEMORY_BASIC_INFORMATION mbi = { 0 }; + unsigned char* pEndRegion = nullptr; + while (sizeof(mbi) == VirtualQuery(pEndRegion, &mbi, sizeof(mbi))) { + pEndRegion += mbi.RegionSize; + if ((mbi.AllocationProtect & PAGE_READWRITE) && (mbi.State & MEM_COMMIT)) { + memoryUsage += mbi.RegionSize; + } + } + return memoryUsage; + } + + size_t GetMemorySize(void* memory) + { + return memory ? _aligned_msize(memory, 1, 0) : 0; + } + } +} diff --git a/Code/Framework/AzCore/Tests/Platform/Windows/platform_windows_files.cmake b/Code/Framework/AzCore/Tests/Platform/Windows/platform_windows_files.cmake index 0a96dad34e..97b12b28e6 100644 --- a/Code/Framework/AzCore/Tests/Platform/Windows/platform_windows_files.cmake +++ b/Code/Framework/AzCore/Tests/Platform/Windows/platform_windows_files.cmake @@ -9,6 +9,7 @@ set(FILES ../Common/WinAPI/Tests/UtilsTests_WinAPI.cpp Tests/IO/Streamer/StorageDriveTests_Windows.cpp + Tests/Memory/AllocatorBenchmarks_Windows.cpp Tests/Memory/OverrunDetectionAllocator_Windows.cpp Tests/Serialization_Windows.cpp ) diff --git a/Code/Framework/AzCore/Tests/azcoretests_files.cmake b/Code/Framework/AzCore/Tests/azcoretests_files.cmake index 98f268b61a..3777071168 100644 --- a/Code/Framework/AzCore/Tests/azcoretests_files.cmake +++ b/Code/Framework/AzCore/Tests/azcoretests_files.cmake @@ -170,6 +170,7 @@ set(FILES Math/Vector3Tests.cpp Math/Vector4PerformanceTests.cpp Math/Vector4Tests.cpp + Memory/AllocatorBenchmarks.cpp Memory/AllocatorManager.cpp Memory/HphaSchema.cpp Memory/HphaSchemaErrorDetection.cpp