diff --git a/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp b/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp index 32ccd43f9a..48994c4eef 100644 --- a/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp +++ b/Code/Framework/AzCore/AzCore/Memory/AllocatorBase.cpp @@ -11,7 +11,7 @@ using namespace AZ; -#define RECORDING_ENABLED 1 +#define RECORDING_ENABLED 0 #if RECORDING_ENABLED @@ -44,18 +44,22 @@ namespace } }; - struct AllocatorOperation + #pragma pack(push, 1) + struct alignas(1) AllocatorOperation { - enum OperationType : unsigned int + enum OperationType : size_t { ALLOCATE, DEALLOCATE }; OperationType m_type: 1; - unsigned int m_size : 28; // Can represent up to 256Mb requests - unsigned int m_alignment : 7; // Can represent up to 128 alignment - unsigned int m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids + size_t m_size : 28; // Can represent up to 256Mb requests + size_t m_alignment : 7; // Can represent up to 128 alignment + size_t m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids }; + #pragma pack(pop) + static_assert(sizeof(AllocatorOperation) == 8); + static AZStd::mutex s_operationsMutex = {}; static constexpr size_t s_maxNumberOfAllocationsToRecord = 16384; @@ -76,7 +80,12 @@ namespace if (s_operationCounter == s_allocationOperationCount) { AZ::IO::SystemFile file; - file.Open("memoryrecordings.bin", AZ::IO::SystemFile::OpenMode::SF_OPEN_APPEND); + int mode = AZ::IO::SystemFile::OpenMode::SF_OPEN_APPEND | AZ::IO::SystemFile::OpenMode::SF_OPEN_WRITE_ONLY; + if (!file.Exists("memoryrecordings.bin")) + { + mode |= AZ::IO::SystemFile::OpenMode::SF_OPEN_CREATE; + } + file.Open("memoryrecordings.bin", mode); if (file.IsOpen()) { file.Write(&s_operations, sizeof(AllocatorOperation) * s_allocationOperationCount); diff --git a/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin index 2b587a2304..ec5de82e83 100644 --- a/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin +++ b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarkRecordings.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d0f148441ce120b303896618cec364b5afb6f8b911b4785ec6358cfe8467cf7a -size 368640 +oid sha256:281ba03e79ecba90b313a0b17bdba87c57d76b504b6e38d579b5eabd995902cc +size 245760 diff --git a/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp index f5a728018c..aa1c7f21ab 100644 --- a/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp +++ b/Code/Framework/AzCore/Tests/Memory/AllocatorBenchmarks.cpp @@ -390,115 +390,159 @@ namespace Benchmark } } }; - + template - class RecordedAllocationBenchmarkFixture : public AllocatorBenchmarkFixture + class RecordedAllocationBenchmarkFixture : public ::benchmark::Fixture { - using base = AllocatorBenchmarkFixture; - using TestAllocatorType = typename base::TestAllocatorType; + using TestAllocatorType = TestAllocatorWrapper; + + virtual void internalSetUp() + { + TestAllocatorType::SetUp(); + } + + void internalTearDown() + { + TestAllocatorType::TearDown(); + } - struct AllocatorOperation + #pragma pack(push, 1) + struct alignas(1) AllocatorOperation { - enum OperationType : unsigned int + enum OperationType : size_t { ALLOCATE, DEALLOCATE }; OperationType m_type : 1; - unsigned int m_size : 28; // Can represent up to 256Mb requests - unsigned int m_alignment : 7; // Can represent up to 128 alignment - unsigned int m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids + size_t m_size : 28; // Can represent up to 256Mb requests + size_t m_alignment : 7; // Can represent up to 128 alignment + size_t m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids }; + #pragma pack(pop) + static_assert(sizeof(AllocatorOperation) == 8); public: + void SetUp(const ::benchmark::State&) override + { + internalSetUp(); + } + void SetUp(::benchmark::State&) override + { + internalSetUp(); + } + + void TearDown(const ::benchmark::State&) override + { + internalTearDown(); + } + void TearDown(::benchmark::State&) override + { + internalTearDown(); + } + void Benchmark(benchmark::State& state) { for (auto _ : state) { state.PauseTiming(); - AZStd::unordered_map pointerRemapping; + AZStd::unordered_map pointerRemapping; constexpr size_t allocationOperationCount = 5 * 1024; AZStd::array m_operations = {}; + [[maybe_unused]] const size_t operationSize = sizeof(AllocatorOperation); - AZ::IO::SystemFile file; - AZ::IO::FixedMaxPathString filePath = AZ::Utils::GetExecutableDirectory(); - filePath += "/Tests/AzCore/Memory/AllocatorBenchmarkRecordings.bin"; - if (!file.Open(filePath.c_str(), AZ::IO::SystemFile::OpenMode::SF_OPEN_READ_ONLY)) - { - return; - } - size_t elementsRead = file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations); - size_t totalElementsRead = elementsRead; const size_t processMemoryBaseline = Platform::GetProcessMemoryUsageBytes(); size_t totalAllocationSize = 0; + size_t itemsProcessed = 0; - while (elementsRead > 0) + for (size_t i = 0; i < 100; ++i) // replay the recording, this way we can keep a smaller recording { - for (size_t operationIndex = 0; operationIndex < elementsRead; ++operationIndex) + AZ::IO::SystemFile file; + AZ::IO::FixedMaxPathString filePath = AZ::Utils::GetExecutableDirectory(); + filePath += "/Tests/AzCore/Memory/AllocatorBenchmarkRecordings.bin"; + if (!file.Open(filePath.c_str(), AZ::IO::SystemFile::OpenMode::SF_OPEN_READ_ONLY)) { - const AllocatorOperation& operation = m_operations[operationIndex]; - if (operation.m_type == AllocatorOperation::ALLOCATE) - { - const auto it = pointerRemapping.emplace(operation.m_recordId, nullptr); - if (it.second) // otherwise already allocated - { - state.ResumeTiming(); - void* ptr = TestAllocatorType::Allocate(operation.m_size, operation.m_alignment); - state.PauseTiming(); - totalAllocationSize += operation.m_size; - it.first->second = ptr; - } - else - { - // Doing a resize, dont account for this memory change, this operation is rare and we dont have - // the size of the previous allocation - state.ResumeTiming(); - TestAllocatorType::Resize(it.first->second, operation.m_size); - state.PauseTiming(); - } - } - else // AllocatorOperation::DEALLOCATE: + return; + } + size_t elementsRead = + file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations) / sizeof(AllocatorOperation); + itemsProcessed += elementsRead; + + while (elementsRead > 0) + { + for (size_t operationIndex = 0; operationIndex < elementsRead; ++operationIndex) { - if (operation.m_recordId) + const AllocatorOperation& operation = m_operations[operationIndex]; + if (operation.m_type == AllocatorOperation::ALLOCATE) { - const auto ptrIt = pointerRemapping.find(operation.m_recordId); - if (ptrIt != pointerRemapping.end()) + const auto it = pointerRemapping.emplace(operation.m_recordId, nullptr); + if (it.second) // otherwise already allocated + { + state.ResumeTiming(); + void* ptr = TestAllocatorType::Allocate(operation.m_size, operation.m_alignment); + state.PauseTiming(); + totalAllocationSize += operation.m_size; + it.first->second = ptr; + } + else { - totalAllocationSize -= operation.m_size; + // Doing a resize, dont account for this memory change, this operation is rare and we dont have + // the size of the previous allocation state.ResumeTiming(); - TestAllocatorType::DeAllocate(ptrIt->second, /*operation.m_size*/ 0); // size is not correct after a resize, a 0 size deals with it + TestAllocatorType::Resize(it.first->second, operation.m_size); state.PauseTiming(); - pointerRemapping.erase(ptrIt); } } - else // deallocate(nullptr) are recorded + else // AllocatorOperation::DEALLOCATE: { - // Just to account of the call of deallocate(nullptr); - state.ResumeTiming(); - TestAllocatorType::DeAllocate(nullptr, /*operation.m_size*/ 0); - state.PauseTiming(); + if (operation.m_recordId) + { + const auto ptrIt = pointerRemapping.find(operation.m_recordId); + if (ptrIt != pointerRemapping.end()) + { + totalAllocationSize -= operation.m_size; + state.ResumeTiming(); + TestAllocatorType::DeAllocate( + ptrIt->second, + /*operation.m_size*/ 0); // size is not correct after a resize, a 0 size deals with it + state.PauseTiming(); + pointerRemapping.erase(ptrIt); + } + } + else // deallocate(nullptr) are recorded + { + // Just to account of the call of deallocate(nullptr); + state.ResumeTiming(); + TestAllocatorType::DeAllocate(nullptr, /*operation.m_size*/ 0); + state.PauseTiming(); + } } } + + elementsRead = + file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations) / sizeof(AllocatorOperation); + itemsProcessed += elementsRead; } + file.Close(); - elementsRead = file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations); - totalElementsRead += elementsRead; + // Deallocate the remainder (since we stopped the recording middle-game)(there are leaks as well) + for (const auto& pointerMapping : pointerRemapping) + { + state.ResumeTiming(); + TestAllocatorType::DeAllocate(pointerMapping.second); + state.PauseTiming(); + } + itemsProcessed += pointerRemapping.size(); + pointerRemapping.clear(); } - file.Close(); state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults); state.counters[s_counterProcessMemory] = benchmark::Counter(static_cast(Platform::GetProcessMemoryUsageBytes() - processMemoryBaseline), benchmark::Counter::kDefaults); state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast(totalAllocationSize), benchmark::Counter::kDefaults); - state.SetItemsProcessed(totalElementsRead); + state.SetItemsProcessed(itemsProcessed); - // Deallocate the remainder (since we stopped the recording middle-game)(there are leaks as well) - for (const auto& pointerMapping : pointerRemapping) - { - TestAllocatorType::DeAllocate(pointerMapping.second); - } - pointerRemapping.clear(); TestAllocatorType::GarbageCollect(); } } @@ -514,8 +558,7 @@ namespace Benchmark } static void RecordedRunRanges(benchmark::internal::Benchmark* b) { - b->Arg(1); - b->Iterations(100); + b->Iterations(1); } // For threaded ranges, run just 200, multi-threaded will already multiply by thread @@ -547,6 +590,11 @@ namespace Benchmark BM_REGISTER_TEMPLATE(RecordedAllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE)->Apply(RecordedRunRanges); \ } + /// Warm up benchmark used to prepare the OS for allocations. Most OS keep allocations for a process somehow + /// reserved. So the first allocations run always get a bigger impact in a process. This warm up allocator runs + /// all the benchmarks and is just used for the the next allocators to report more consistent results. + BM_REGISTER_ALLOCATOR(WarmUpAllocator, TestRawMallocAllocator); + BM_REGISTER_ALLOCATOR(RawMallocAllocator, TestRawMallocAllocator); BM_REGISTER_ALLOCATOR(MallocSchemaAllocator, TestMallocSchemaAllocator); BM_REGISTER_ALLOCATOR(HphaSchemaAllocator, TestHphaSchemaAllocator);