Memory/benchmarks (#5896)

* initial version ported from an old implementation

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* simplification of code

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Fixes a recursive loop

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Removing commented code of different options for getting memory usage of a process

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* PR comment (NULL->nullptr)

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Adds mulit-threaded tests

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Improving runtime and making the whole duration manageable

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Fixes Linux build

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Fixes for mac

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Fixes for HeapSchema to get a default block if none is passed

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Adds recording functionality (disabled) and a benchmark that can run recordings

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Removes Heap allocator from being possible to use as a SystemAllocator since it doesnt allow dynamic allocating (only works with pre-allocated blocks)

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* WIP trying to use SystemAllocator instead of raw reads

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Makes the recorded benchmark more stable

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* More stability changes, improvement on type usage within the benchmark, cleanup of unstable stats

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Adds benchmark files for Android

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Fixes Linux nounity build

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* PR comments

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>

* Death test relies on an exception from ocurring, that exception is an access violation, which could not happen (i.e. the memory could be valid for the process)
The test didnt have to be a death test. Also handled the situation better in the code to be able to continue in that scenario (useful for release configurations)"

Signed-off-by: Esteban Papp <81431996+amznestebanpapp@users.noreply.github.com>
monroegm-disable-blank-issue-2
Esteban Papp 4 years ago committed by GitHub
parent 1d0cd46cb7
commit 89067fe667
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -6,8 +6,155 @@
*
*/
#include <AzCore/Memory/AllocatorManager.h>
#include <AzCore/Memory/Memory.h>
#include <AzCore/Memory/AllocatorManager.h>
#define RECORDING_ENABLED 0
#if RECORDING_ENABLED
#include <AzCore/std/containers/unordered_map.h>
#include <AzCore/IO/SystemFile.h>
#include <AzCore/std/parallel/mutex.h>
#include <AzCore/std/parallel/scoped_lock.h>
namespace
{
class DebugAllocator
{
public:
using pointer_type = void*;
using size_type = AZStd::size_t;
using difference_type = AZStd::ptrdiff_t;
using allow_memory_leaks = AZStd::false_type; ///< Regular allocators should not leak.
AZ_FORCE_INLINE pointer_type allocate(size_t byteSize, size_t alignment, int = 0)
{
return AZ_OS_MALLOC(byteSize, alignment);
}
AZ_FORCE_INLINE size_type resize(pointer_type, size_type)
{
return 0;
}
AZ_FORCE_INLINE void deallocate(pointer_type ptr, size_type, size_type)
{
AZ_OS_FREE(ptr);
}
};
#pragma pack(push, 1)
struct alignas(1) AllocatorOperation
{
enum OperationType : size_t
{
ALLOCATE,
DEALLOCATE
};
OperationType m_type: 1;
size_t m_size : 28; // Can represent up to 256Mb requests
size_t m_alignment : 7; // Can represent up to 128 alignment
size_t m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids
};
#pragma pack(pop)
static_assert(sizeof(AllocatorOperation) == 8);
static AZStd::mutex s_operationsMutex = {};
static constexpr size_t s_maxNumberOfAllocationsToRecord = 16384;
static size_t s_numberOfAllocationsRecorded = 0;
static constexpr size_t s_allocationOperationCount = 5 * 1024;
static AZStd::array<AllocatorOperation, s_allocationOperationCount> s_operations = {};
static uint64_t s_operationCounter = 0;
static unsigned int s_nextRecordId = 1;
using AllocatorOperationByAddress = AZStd::unordered_map<void*, AllocatorOperation, AZStd::less<void*>, DebugAllocator>;
static AllocatorOperationByAddress s_allocatorOperationByAddress;
using AvailableRecordIds = AZStd::vector<unsigned int, DebugAllocator>;
AvailableRecordIds s_availableRecordIds;
void RecordAllocatorOperation(AllocatorOperation::OperationType type, void* ptr, size_t size = 0, size_t alignment = 0)
{
AZStd::scoped_lock<AZStd::mutex> lock(s_operationsMutex);
if (s_operationCounter == s_allocationOperationCount)
{
AZ::IO::SystemFile file;
int mode = AZ::IO::SystemFile::OpenMode::SF_OPEN_APPEND | AZ::IO::SystemFile::OpenMode::SF_OPEN_WRITE_ONLY;
if (!file.Exists("memoryrecordings.bin"))
{
mode |= AZ::IO::SystemFile::OpenMode::SF_OPEN_CREATE;
}
file.Open("memoryrecordings.bin", mode);
if (file.IsOpen())
{
file.Write(&s_operations, sizeof(AllocatorOperation) * s_allocationOperationCount);
file.Close();
}
s_operationCounter = 0;
}
AllocatorOperation& operation = s_operations[s_operationCounter++];
operation.m_type = type;
if (type == AllocatorOperation::OperationType::ALLOCATE)
{
if (s_numberOfAllocationsRecorded > s_maxNumberOfAllocationsToRecord)
{
// reached limit of allocations, dont record anymore
--s_operationCounter;
return;
}
++s_numberOfAllocationsRecorded;
operation.m_size = size;
operation.m_alignment = alignment;
unsigned int recordId = 0;
if (!s_availableRecordIds.empty())
{
recordId = s_availableRecordIds.back();
s_availableRecordIds.pop_back();
}
else
{
recordId = s_nextRecordId;
++s_nextRecordId;
}
operation.m_recordId = recordId;
auto it = s_allocatorOperationByAddress.emplace(ptr, operation);
if (!it.second)
{
// double alloc or resize, leave the current record and return the id
operation = it.first->second;
s_availableRecordIds.emplace_back(recordId);
}
}
else
{
if (ptr == nullptr)
{
// common scenario, just record the operation
operation.m_size = 0;
operation.m_alignment = 0;
operation.m_recordId = 0; // recordId = 0 will flag this case
}
else
{
auto it = s_allocatorOperationByAddress.find(ptr);
if (it != s_allocatorOperationByAddress.end())
{
operation.m_size = it->second.m_size;
operation.m_alignment = it->second.m_alignment;
operation.m_recordId = it->second.m_recordId;
s_availableRecordIds.push_back(it->second.m_recordId);
s_allocatorOperationByAddress.erase(it);
}
else
{
// just dont record this operation
--s_operationCounter;
}
}
}
}
}
#endif
namespace AZ
{
@ -150,6 +297,10 @@ namespace AZ
records->RegisterAllocation(ptr, byteSize, alignment, name, fileName, lineNum, suppressStackRecord + 1);
}
}
#if RECORDING_ENABLED
RecordAllocatorOperation(AllocatorOperation::ALLOCATE, ptr, byteSize, alignment);
#endif
}
void AllocatorBase::ProfileDeallocation(void* ptr, size_t byteSize, size_t alignment, Debug::AllocationInfo* info)
@ -162,6 +313,9 @@ namespace AZ
records->UnregisterAllocation(ptr, byteSize, alignment, info);
}
}
#if RECORDING_ENABLED
RecordAllocatorOperation(AllocatorOperation::DEALLOCATE, ptr, byteSize, alignment);
#endif
}
void AllocatorBase::ProfileReallocationBegin([[maybe_unused]] void* ptr, [[maybe_unused]] size_t newSize)
@ -176,6 +330,10 @@ namespace AZ
ProfileDeallocation(ptr, 0, 0, &info);
ProfileAllocation(newPtr, newSize, newAlignment, info.m_name, info.m_fileName, info.m_lineNum, 0);
}
#if RECORDING_ENABLED
RecordAllocatorOperation(AllocatorOperation::DEALLOCATE, ptr);
RecordAllocatorOperation(AllocatorOperation::ALLOCATE, newPtr, newSize, newAlignment);
#endif
}
void AllocatorBase::ProfileReallocation(void* ptr, void* newPtr, size_t newSize, size_t newAlignment)
@ -193,6 +351,9 @@ namespace AZ
records->ResizeAllocation(ptr, newSize);
}
}
#if RECORDING_ENABLED
RecordAllocatorOperation(AllocatorOperation::ALLOCATE, ptr, newSize);
#endif
}
bool AllocatorBase::OnOutOfMemory(size_t byteSize, size_t alignment, int flags, const char* name, const char* fileName, int lineNum)

@ -115,6 +115,7 @@ namespace AZ
m_ownMemoryBlock[i] = false;
}
AZ_Assert(m_desc.m_numMemoryBlocks > 0, "At least one memory block is required");
for (int i = 0; i < m_desc.m_numMemoryBlocks; ++i)
{
if (m_desc.m_memoryBlocks[i] == nullptr) // Allocate memory block if requested!
@ -131,17 +132,6 @@ namespace AZ
m_capacity += m_desc.m_memoryBlocksByteSize[i];
}
if (m_desc.m_numMemoryBlocks == 0)
{
// Create default memory space if we can to serve for default allocations
m_memSpaces[0] = AZDLMalloc::create_mspace(0, m_desc.m_isMultithreadAlloc);
if (m_memSpaces[0])
{
AZDLMalloc::mspace_az_set_expandable(m_memSpaces[0], true);
m_capacity = Platform::GetHeapCapacity();
}
}
}
HeapSchema::~HeapSchema()

@ -32,17 +32,11 @@ namespace AZ
*/
struct Descriptor
{
Descriptor()
: m_numMemoryBlocks(0)
, m_isMultithreadAlloc(true)
{}
static const int m_memoryBlockAlignment = 64 * 1024;
static const int m_maxNumBlocks = 5;
int m_numMemoryBlocks; ///< Number of memory blocks to use.
void* m_memoryBlocks[m_maxNumBlocks]; ///< Pointers to provided memory blocks or NULL if you want the system to allocate them for you with the System Allocator.
size_t m_memoryBlocksByteSize[m_maxNumBlocks]; ///< Sizes of different memory blocks, if m_memoryBlock is 0 the block will be allocated for you with the System Allocator.
bool m_isMultithreadAlloc; ///< Set to true to enable multi threading safe allocation.
int m_numMemoryBlocks = 1; ///< Number of memory blocks to use.
void* m_memoryBlocks[m_maxNumBlocks] = {}; ///< Pointers to provided memory blocks or NULL if you want the system to allocate them for you with the System Allocator.
size_t m_memoryBlocksByteSize[m_maxNumBlocks] = {4 * 1024}; ///< Sizes of different memory blocks, if m_memoryBlock is 0 the block will be allocated for you with the System Allocator.
bool m_isMultithreadAlloc = true; ///< Set to true to enable multi threading safe allocation.
};
HeapSchema(const Descriptor& desc);

@ -18,7 +18,6 @@
#define AZCORE_SYSTEM_ALLOCATOR_HPHA 1
#define AZCORE_SYSTEM_ALLOCATOR_MALLOC 2
#define AZCORE_SYSTEM_ALLOCATOR_HEAP 3
#if !defined(AZCORE_SYSTEM_ALLOCATOR)
// define the default
@ -29,8 +28,6 @@
#include <AzCore/Memory/HphaSchema.h>
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC
#include <AzCore/Memory/MallocSchema.h>
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP
#include <AzCore/Memory/HeapSchema.h>
#else
#error "Invalid allocator selected for SystemAllocator"
#endif
@ -44,8 +41,6 @@ namespace AZ
static AZStd::aligned_storage<sizeof(HphaSchema), AZStd::alignment_of<HphaSchema>::value>::type g_systemSchema;
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC
static AZStd::aligned_storage<sizeof(MallocSchema), AZStd::alignment_of<MallocSchema>::value>::type g_systemSchema;
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP
static AZStd::aligned_storage<sizeof(HeapSchema), AZStd::alignment_of<HeapSchema>::value>::type g_systemSchema;
#endif
//////////////////////////////////////////////////////////////////////////
@ -118,11 +113,6 @@ namespace AZ
heapDesc.m_systemChunkSize = desc.m_heap.m_systemChunkSize;
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC
MallocSchema::Descriptor heapDesc;
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP
HeapSchema::Descriptor heapDesc;
memcpy(heapDesc.m_memoryBlocks, desc.m_heap.m_memoryBlocks, sizeof(heapDesc.m_memoryBlocks));
memcpy(heapDesc.m_memoryBlocksByteSize, desc.m_heap.m_memoryBlocksByteSize, sizeof(heapDesc.m_memoryBlocksByteSize));
heapDesc.m_numMemoryBlocks = desc.m_heap.m_numMemoryBlocks;
#endif
if (&AllocatorInstance<SystemAllocator>::Get() == this) // if we are the system allocator
{
@ -132,8 +122,6 @@ namespace AZ
m_allocator = new (&g_systemSchema) HphaSchema(heapDesc);
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC
m_allocator = new (&g_systemSchema) MallocSchema(heapDesc);
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP
m_allocator = new (&g_systemSchema) HeapSchema(heapDesc);
#endif
g_isSystemSchemaUsed = true;
isReady = true;
@ -149,8 +137,6 @@ namespace AZ
m_allocator = azcreate(HphaSchema, (heapDesc), SystemAllocator);
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC
m_allocator = azcreate(MallocSchema, (heapDesc), SystemAllocator);
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP
m_allocator = azcreate(HeapSchema, (heapDesc), SystemAllocator);
#endif
if (m_allocator == nullptr)
{
@ -186,8 +172,6 @@ namespace AZ
static_cast<HphaSchema*>(m_allocator)->~HphaSchema();
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_MALLOC
static_cast<MallocSchema*>(m_allocator)->~MallocSchema();
#elif AZCORE_SYSTEM_ALLOCATOR == AZCORE_SYSTEM_ALLOCATOR_HEAP
static_cast<HeapSchema*>(m_allocator)->~HeapSchema();
#endif
g_isSystemSchemaUsed = false;
}

@ -146,6 +146,11 @@ if(PAL_TRAIT_BUILD_TESTS_SUPPORTED)
PROPERTY COMPILE_DEFINITIONS
VALUES AZCORETEST_DLL_NAME=\"$<TARGET_FILE_NAME:AzCore.Tests>\"
)
ly_add_target_files(
TARGETS AzCore.Tests
FILES ${CMAKE_CURRENT_SOURCE_DIR}/Tests/Memory/AllocatorBenchmarkRecordings.bin
OUTPUT_SUBDIRECTORY Tests/AzCore/Memory
)
endif()

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:281ba03e79ecba90b313a0b17bdba87c57d76b504b6e38d579b5eabd995902cc
size 245760

@ -0,0 +1,591 @@
/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#if defined(HAVE_BENCHMARK)
#include <AzCore/PlatformIncl.h>
#include <AzCore/IO/SystemFile.h>
#include <AzCore/RTTI/TypeInfo.h>
#include <AzCore/Memory/BestFitExternalMapAllocator.h>
#include <AzCore/Memory/HeapSchema.h>
#include <AzCore/Memory/HphaSchema.h>
#include <AzCore/Memory/MallocSchema.h>
#include <AzCore/Memory/OSAllocator.h>
#include <AzCore/Memory/PoolSchema.h>
#include <AzCore/Memory/SystemAllocator.h>
#include <AzCore/std/containers/array.h>
#include <AzCore/std/containers/vector.h>
#include <AzCore/std/containers/unordered_map.h>
#include <AzCore/Utils/Utils.h>
#include <benchmark/benchmark.h>
namespace Benchmark
{
namespace Platform
{
size_t GetProcessMemoryUsageBytes();
size_t GetMemorySize(void* memory);
}
/// <summary>
/// Test allocator wrapper that redirects the calls to the passed TAllocator by using AZ::AllocatorInstance.
/// It also creates/destroys the TAllocator type (to reflect what happens at runtime)
/// </summary>
/// <typeparam name="TAllocator">Allocator type to wrap</typeparam>
template<typename TAllocator>
class TestAllocatorWrapper
{
public:
static void SetUp()
{
AZ::AllocatorInstance<TAllocator>::Create();
}
static void TearDown()
{
AZ::AllocatorInstance<TAllocator>::Destroy();
}
static void* Allocate(size_t byteSize, size_t alignment)
{
return AZ::AllocatorInstance<TAllocator>::Get().Allocate(byteSize, alignment);
}
static void DeAllocate(void* ptr, size_t byteSize = 0)
{
AZ::AllocatorInstance<TAllocator>::Get().DeAllocate(ptr, byteSize);
}
static void* ReAllocate(void* ptr, size_t newSize, size_t newAlignment)
{
return AZ::AllocatorInstance<TAllocator>::Get().ReAllocate(ptr, newSize, newAlignment);
}
static size_t Resize(void* ptr, size_t newSize)
{
return AZ::AllocatorInstance<TAllocator>::Get().Resize(ptr, newSize);
}
static void GarbageCollect()
{
AZ::AllocatorInstance<TAllocator>::Get().GarbageCollect();
}
static size_t NumAllocatedBytes()
{
return AZ::AllocatorInstance<TAllocator>::Get().NumAllocatedBytes() +
AZ::AllocatorInstance<TAllocator>::Get().GetUnAllocatedMemory();
}
static size_t GetSize(void* ptr)
{
return AZ::AllocatorInstance<TAllocator>::Get().AllocationSize(ptr);
}
};
/// <summary>
/// Basic allocator used as a baseline. This allocator is the most basic allocation possible with the OS (AZ_OS_MALLOC).
/// MallocSchema cannot be used here because it has extra logic that we don't want to use as a baseline.
/// </summary>
class RawMallocAllocator {};
template<>
class TestAllocatorWrapper<RawMallocAllocator>
{
public:
TestAllocatorWrapper()
{
s_numAllocatedBytes = 0;
}
static void SetUp()
{
s_numAllocatedBytes = 0;
}
static void TearDown()
{
}
// IAllocatorAllocate
static void* Allocate(size_t byteSize, size_t)
{
s_numAllocatedBytes += byteSize;
// Don't pass an alignment since we wont be able to get the memory size without also passing the alignment
return AZ_OS_MALLOC(byteSize, 1);
}
static void DeAllocate(void* ptr, size_t = 0)
{
s_numAllocatedBytes -= Platform::GetMemorySize(ptr);
AZ_OS_FREE(ptr);
}
static void* ReAllocate(void* ptr, size_t newSize, size_t)
{
s_numAllocatedBytes -= Platform::GetMemorySize(ptr);
AZ_OS_FREE(ptr);
s_numAllocatedBytes += newSize;
return AZ_OS_MALLOC(newSize, 1);
}
static size_t Resize(void* ptr, size_t newSize)
{
AZ_UNUSED(ptr);
AZ_UNUSED(newSize);
return 0;
}
static void GarbageCollect() {}
static size_t NumAllocatedBytes()
{
return s_numAllocatedBytes;
}
static size_t GetSize(void* ptr)
{
return Platform::GetMemorySize(ptr);
}
private:
static size_t s_numAllocatedBytes;
};
size_t TestAllocatorWrapper<RawMallocAllocator>::s_numAllocatedBytes = 0;
// Some allocator are not fully declared, those we simply setup from the schema
class MallocSchemaAllocator : public AZ::SimpleSchemaAllocator<AZ::MallocSchema>
{
public:
AZ_TYPE_INFO(MallocSchemaAllocator, "{3E68224F-E676-402C-8276-CE4B49C05E89}");
MallocSchemaAllocator()
: AZ::SimpleSchemaAllocator<AZ::MallocSchema>("MallocSchemaAllocator", "")
{}
};
// We use both this HphaSchemaAllocator and the SystemAllocator configured with Hpha because the SystemAllocator
// has extra things
class HphaSchemaAllocator : public AZ::SimpleSchemaAllocator<AZ::HphaSchema>
{
public:
AZ_TYPE_INFO(HphaSchemaAllocator, "{6563AB4B-A68E-4499-8C98-D61D640D1F7F}");
HphaSchemaAllocator()
: AZ::SimpleSchemaAllocator<AZ::HphaSchema>("TestHphaSchemaAllocator", "")
{}
};
// For the SystemAllocator we inherit so we have a different stack. The SystemAllocator is used globally so we dont want
// to get that data affecting the benchmark
class TestSystemAllocator : public AZ::SystemAllocator
{
public:
AZ_TYPE_INFO(TestSystemAllocator, "{360D4DAA-D65D-4D5C-A6FA-1A4C5261C35C}");
TestSystemAllocator()
: AZ::SystemAllocator()
{
}
};
// Allocated bytes reported by the allocator
static const char* s_counterAllocatorMemory = "Allocator_Memory";
// Allocated bytes as counted by the benchmark
static const char* s_counterBenchmarkMemory = "Benchmark_Memory";
enum AllocationSize
{
SMALL,
BIG,
MIXED,
COUNT
};
static const size_t s_kiloByte = 1024;
static const size_t s_megaByte = s_kiloByte * s_kiloByte;
using AllocationSizeArray = AZStd::array<size_t, 10>;
static const AZStd::array<AllocationSizeArray, COUNT> s_allocationSizes = {
/* SMALL */ AllocationSizeArray{ 2, 16, 20, 59, 100, 128, 160, 250, 300, 512 },
/* BIG */ AllocationSizeArray{ 513, s_kiloByte, 2 * s_kiloByte, 4 * s_kiloByte, 10 * s_kiloByte, 64 * s_kiloByte, 128 * s_kiloByte, 200 * s_kiloByte, s_megaByte, 2 * s_megaByte },
/* MIXED */ AllocationSizeArray{ 2, s_kiloByte, 59, 4 * s_kiloByte, 128, 200 * s_kiloByte, 250, s_megaByte, 512, 2 * s_megaByte }
};
template <typename TAllocator>
class AllocatorBenchmarkFixture
: public ::benchmark::Fixture
{
protected:
using TestAllocatorType = TestAllocatorWrapper<TAllocator>;
virtual void internalSetUp(const ::benchmark::State& state)
{
if (state.thread_index == 0) // Only setup in the first thread
{
TestAllocatorType::SetUp();
m_allocations.resize(state.threads);
for (auto& perThreadAllocations : m_allocations)
{
perThreadAllocations.resize(state.range(0), nullptr);
}
}
}
virtual void internalTearDown(const ::benchmark::State& state)
{
if (state.thread_index == 0) // Only setup in the first thread
{
m_allocations.clear();
m_allocations.shrink_to_fit();
TestAllocatorType::TearDown();
}
}
AZStd::vector<void*>& GetPerThreadAllocations(size_t threadIndex)
{
return m_allocations[threadIndex];
}
public:
void SetUp(const ::benchmark::State& state) override
{
internalSetUp(state);
}
void SetUp(::benchmark::State& state) override
{
internalSetUp(state);
}
void TearDown(const ::benchmark::State& state) override
{
internalTearDown(state);
}
void TearDown(::benchmark::State& state) override
{
internalTearDown(state);
}
private:
AZStd::vector<AZStd::vector<void*>> m_allocations;
};
template <typename TAllocator, AllocationSize TAllocationSize>
class AllocationBenchmarkFixture
: public AllocatorBenchmarkFixture<TAllocator>
{
using base = AllocatorBenchmarkFixture<TAllocator>;
using TestAllocatorType = typename base::TestAllocatorType;
public:
void Benchmark(benchmark::State& state)
{
for (auto _ : state)
{
state.PauseTiming();
AZStd::vector<void*>& perThreadAllocations = base::GetPerThreadAllocations(state.thread_index);
const size_t numberOfAllocations = perThreadAllocations.size();
size_t totalAllocationSize = 0;
for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex)
{
const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize];
const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()];
totalAllocationSize += allocationSize;
state.ResumeTiming();
perThreadAllocations[allocationIndex] = TestAllocatorType::Allocate(allocationSize, 0);
state.PauseTiming();
}
state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast<double>(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults);
state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast<double>(totalAllocationSize), benchmark::Counter::kDefaults);
for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex)
{
const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize];
const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()];
TestAllocatorType::DeAllocate(perThreadAllocations[allocationIndex], allocationSize);
perThreadAllocations[allocationIndex] = nullptr;
}
TestAllocatorType::GarbageCollect();
state.SetItemsProcessed(numberOfAllocations);
}
}
};
template <typename TAllocator, AllocationSize TAllocationSize>
class DeAllocationBenchmarkFixture
: public AllocatorBenchmarkFixture<TAllocator>
{
using base = AllocatorBenchmarkFixture<TAllocator>;
using TestAllocatorType = typename base::TestAllocatorType;
public:
void Benchmark(benchmark::State& state)
{
for (auto _ : state)
{
state.PauseTiming();
AZStd::vector<void*>& perThreadAllocations = base::GetPerThreadAllocations(state.thread_index);
const size_t numberOfAllocations = perThreadAllocations.size();
size_t totalAllocationSize = 0;
for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex)
{
const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize];
const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()];
totalAllocationSize += allocationSize;
perThreadAllocations[allocationIndex] = TestAllocatorType::Allocate(allocationSize, 0);
}
for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex)
{
const AllocationSizeArray& allocationArray = s_allocationSizes[TAllocationSize];
const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()];
state.ResumeTiming();
TestAllocatorType::DeAllocate(perThreadAllocations[allocationIndex], allocationSize);
state.PauseTiming();
perThreadAllocations[allocationIndex] = nullptr;
}
state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast<double>(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults);
state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast<double>(totalAllocationSize), benchmark::Counter::kDefaults);
state.SetItemsProcessed(numberOfAllocations);
TestAllocatorType::GarbageCollect();
}
}
};
template<typename TAllocator>
class RecordedAllocationBenchmarkFixture : public ::benchmark::Fixture
{
using TestAllocatorType = TestAllocatorWrapper<TAllocator>;
virtual void internalSetUp()
{
TestAllocatorType::SetUp();
}
void internalTearDown()
{
TestAllocatorType::TearDown();
}
#pragma pack(push, 1)
struct alignas(1) AllocatorOperation
{
enum OperationType : size_t
{
ALLOCATE,
DEALLOCATE
};
OperationType m_type : 1;
size_t m_size : 28; // Can represent up to 256Mb requests
size_t m_alignment : 7; // Can represent up to 128 alignment
size_t m_recordId : 28; // Can represent up to 256M simultaneous requests, we reuse ids
};
#pragma pack(pop)
static_assert(sizeof(AllocatorOperation) == 8);
public:
void SetUp(const ::benchmark::State&) override
{
internalSetUp();
}
void SetUp(::benchmark::State&) override
{
internalSetUp();
}
void TearDown(const ::benchmark::State&) override
{
internalTearDown();
}
void TearDown(::benchmark::State&) override
{
internalTearDown();
}
void Benchmark(benchmark::State& state)
{
for (auto _ : state)
{
state.PauseTiming();
AZStd::unordered_map<size_t, void*> pointerRemapping;
constexpr size_t allocationOperationCount = 5 * 1024;
AZStd::array<AllocatorOperation, allocationOperationCount> m_operations = {};
[[maybe_unused]] const size_t operationSize = sizeof(AllocatorOperation);
size_t totalAllocationSize = 0;
size_t itemsProcessed = 0;
for (size_t i = 0; i < 100; ++i) // play the recording multiple times to get a good stable sample, this way we can keep a smaller recording
{
AZ::IO::SystemFile file;
AZ::IO::FixedMaxPathString filePath = AZ::Utils::GetExecutableDirectory();
filePath += "/Tests/AzCore/Memory/AllocatorBenchmarkRecordings.bin";
if (!file.Open(filePath.c_str(), AZ::IO::SystemFile::OpenMode::SF_OPEN_READ_ONLY))
{
return;
}
size_t elementsRead =
file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations) / sizeof(AllocatorOperation);
itemsProcessed += elementsRead;
while (elementsRead > 0)
{
for (size_t operationIndex = 0; operationIndex < elementsRead; ++operationIndex)
{
const AllocatorOperation& operation = m_operations[operationIndex];
if (operation.m_type == AllocatorOperation::ALLOCATE)
{
const auto it = pointerRemapping.emplace(operation.m_recordId, nullptr);
if (it.second) // otherwise already allocated
{
state.ResumeTiming();
void* ptr = TestAllocatorType::Allocate(operation.m_size, operation.m_alignment);
state.PauseTiming();
totalAllocationSize += operation.m_size;
it.first->second = ptr;
}
else
{
// Doing a resize, dont account for this memory change, this operation is rare and we dont have
// the size of the previous allocation
state.ResumeTiming();
TestAllocatorType::Resize(it.first->second, operation.m_size);
state.PauseTiming();
}
}
else // AllocatorOperation::DEALLOCATE:
{
if (operation.m_recordId)
{
const auto ptrIt = pointerRemapping.find(operation.m_recordId);
if (ptrIt != pointerRemapping.end())
{
totalAllocationSize -= operation.m_size;
state.ResumeTiming();
TestAllocatorType::DeAllocate(
ptrIt->second,
/*operation.m_size*/ 0); // size is not correct after a resize, a 0 size deals with it
state.PauseTiming();
pointerRemapping.erase(ptrIt);
}
}
else // deallocate(nullptr) are recorded
{
// Just to account of the call of deallocate(nullptr);
state.ResumeTiming();
TestAllocatorType::DeAllocate(nullptr, /*operation.m_size*/ 0);
state.PauseTiming();
}
}
}
elementsRead =
file.Read(sizeof(AllocatorOperation) * allocationOperationCount, &m_operations) / sizeof(AllocatorOperation);
itemsProcessed += elementsRead;
}
file.Close();
// Deallocate the remainder (since we stopped the recording middle-game)(there are leaks as well)
for (const auto& pointerMapping : pointerRemapping)
{
state.ResumeTiming();
TestAllocatorType::DeAllocate(pointerMapping.second);
state.PauseTiming();
}
itemsProcessed += pointerRemapping.size();
pointerRemapping.clear();
}
state.counters[s_counterAllocatorMemory] = benchmark::Counter(static_cast<double>(TestAllocatorType::NumAllocatedBytes()), benchmark::Counter::kDefaults);
state.counters[s_counterBenchmarkMemory] = benchmark::Counter(static_cast<double>(totalAllocationSize), benchmark::Counter::kDefaults);
state.SetItemsProcessed(itemsProcessed);
TestAllocatorType::GarbageCollect();
}
}
};
// For non-threaded ranges, run 100, 400, 1600 amounts
static void RunRanges(benchmark::internal::Benchmark* b)
{
for (int i = 0; i < 6; i += 2)
{
b->Arg((1 << i) * 100);
}
}
static void RecordedRunRanges(benchmark::internal::Benchmark* b)
{
b->Iterations(1);
}
// For threaded ranges, run just 200, multi-threaded will already multiply by thread
static void ThreadedRunRanges(benchmark::internal::Benchmark* b)
{
b->Arg(100);
}
// Test under and over-subscription of threads vs the amount of CPUs available
static const unsigned int MaxThreadRange = 2 * AZStd::thread::hardware_concurrency();
#define BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME, ...) \
BENCHMARK_TEMPLATE_DEFINE_F(FIXTURE, TESTNAME, __VA_ARGS__)(benchmark::State& state) { Benchmark(state); } \
BENCHMARK_REGISTER_F(FIXTURE, TESTNAME)
// We test small/big/mixed allocations in single-threaded environments. For multi-threaded environments, we test mixed since
// the multi threaded fixture will run multiple passes (1, 2, 4, ... until 2*hardware_concurrency)
#define BM_REGISTER_SIZE_FIXTURES(FIXTURE, TESTNAME, ALLOCATORTYPE) \
BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_SMALL, ALLOCATORTYPE, SMALL)->Apply(RunRanges); \
BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_BIG, ALLOCATORTYPE, BIG)->Apply(RunRanges); \
BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_MIXED, ALLOCATORTYPE, MIXED)->Apply(RunRanges); \
BM_REGISTER_TEMPLATE(FIXTURE, TESTNAME##_MIXED_THREADED, ALLOCATORTYPE, MIXED)->ThreadRange(2, MaxThreadRange)->Apply(ThreadedRunRanges);
#define BM_REGISTER_ALLOCATOR(TESTNAME, ALLOCATORTYPE) \
namespace BM_##TESTNAME \
{ \
BM_REGISTER_SIZE_FIXTURES(AllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE); \
BM_REGISTER_SIZE_FIXTURES(DeAllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE); \
BM_REGISTER_TEMPLATE(RecordedAllocationBenchmarkFixture, TESTNAME, ALLOCATORTYPE)->Apply(RecordedRunRanges); \
}
/// Warm up benchmark used to prepare the OS for allocations. Most OS keep allocations for a process somehow
/// reserved. So the first allocations run always get a bigger impact in a process. This warm up allocator runs
/// all the benchmarks and is just used for the the next allocators to report more consistent results.
BM_REGISTER_ALLOCATOR(WarmUpAllocator, RawMallocAllocator);
BM_REGISTER_ALLOCATOR(RawMallocAllocator, RawMallocAllocator);
BM_REGISTER_ALLOCATOR(MallocSchemaAllocator, MallocSchemaAllocator);
BM_REGISTER_ALLOCATOR(HphaSchemaAllocator, HphaSchemaAllocator);
BM_REGISTER_ALLOCATOR(SystemAllocator, TestSystemAllocator);
//BM_REGISTER_ALLOCATOR(BestFitExternalMapAllocator, BestFitExternalMapAllocator); // Requires to pre-allocate blocks and cannot work as a general-purpose allocator
//BM_REGISTER_ALLOCATOR(HeapSchemaAllocator, TestHeapSchemaAllocator); // Requires to pre-allocate blocks and cannot work as a general-purpose allocator
//BM_REGISTER_SCHEMA(PoolSchema); // Requires special alignment requests while allocating
#undef BM_REGISTER_ALLOCATOR
#undef BM_REGISTER_SIZE_FIXTURES
#undef BM_REGISTER_TEMPLATE
} // Benchmark
#endif // HAVE_BENCHMARK

@ -10,10 +10,6 @@
#include <AzCore/Memory/HphaSchema.h>
#include <AzCore/std/containers/vector.h>
#if defined(HAVE_BENCHMARK)
#include <benchmark/benchmark.h>
#endif // HAVE_BENCHMARK
class HphaSchema_TestAllocator
: public AZ::SimpleSchemaAllocator<AZ::HphaSchema>
{
@ -112,87 +108,3 @@ namespace UnitTest
HphaSchemaTestFixture,
::testing::ValuesIn(s_mixedInstancesParameters));
}
#if defined(HAVE_BENCHMARK)
namespace Benchmark
{
class HphaSchemaBenchmarkFixture
: public ::benchmark::Fixture
{
void internalSetUp()
{
AZ::AllocatorInstance<HphaSchema_TestAllocator>::Create();
}
void internalTearDown()
{
AZ::AllocatorInstance<HphaSchema_TestAllocator>::Destroy();
}
public:
void SetUp(const benchmark::State&) override
{
internalSetUp();
}
void SetUp(benchmark::State&) override
{
internalSetUp();
}
void TearDown(const benchmark::State&) override
{
internalTearDown();
}
void TearDown(benchmark::State&) override
{
internalTearDown();
}
static void BM_Allocations(benchmark::State& state, const AllocationSizeArray& allocationArray)
{
AZStd::vector<void*> allocations;
while (state.KeepRunning())
{
state.PauseTiming();
const size_t allocationIndex = allocations.size();
const size_t allocationSize = allocationArray[allocationIndex % allocationArray.size()];
state.ResumeTiming();
void* allocation = AZ::AllocatorInstance<HphaSchema_TestAllocator>::Get().Allocate(allocationSize, 0);
state.PauseTiming();
allocations.emplace_back(allocation);
state.ResumeTiming();
}
const size_t numberOfAllocations = allocations.size();
state.SetItemsProcessed(numberOfAllocations);
for (size_t allocationIndex = 0; allocationIndex < numberOfAllocations; ++allocationIndex)
{
AZ::AllocatorInstance<HphaSchema_TestAllocator>::Get().DeAllocate(allocations[allocationIndex], allocationArray[allocationIndex % allocationArray.size()]);
}
AZ::AllocatorInstance<HphaSchema_TestAllocator>::Get().GarbageCollect();
}
};
// Small allocations, these are allocations that are going to end up in buckets in the HphaSchema
BENCHMARK_F(HphaSchemaBenchmarkFixture, SmallAllocations)(benchmark::State& state)
{
BM_Allocations(state, s_smallAllocationSizes);
}
BENCHMARK_F(HphaSchemaBenchmarkFixture, BigAllocations)(benchmark::State& state)
{
BM_Allocations(state, s_bigAllocationSizes);
}
BENCHMARK_F(HphaSchemaBenchmarkFixture, MixedAllocations)(benchmark::State& state)
{
BM_Allocations(state, s_mixedAllocationSizes);
}
} // Benchmark
#endif // HAVE_BENCHMARK

@ -0,0 +1,31 @@
/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <AzCore/PlatformIncl.h>
#include <AzCore/Debug/Trace.h>
#include <malloc.h>
#include <sys/resource.h>
namespace Benchmark
{
namespace Platform
{
size_t GetProcessMemoryUsageBytes()
{
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
return rusage.ru_maxrss * 1024L;
}
size_t GetMemorySize(void* memory)
{
return memory ? malloc_usable_size(memory) : 0;
}
}
}

@ -8,4 +8,5 @@
set(FILES
Tests/UtilsTests_Android.cpp
Tests/Memory/AllocatorBenchmarks_Android.cpp
)

@ -0,0 +1,31 @@
/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <AzCore/PlatformIncl.h>
#include <AzCore/Debug/Trace.h>
#include <malloc.h>
#include <sys/resource.h>
namespace Benchmark
{
namespace Platform
{
size_t GetProcessMemoryUsageBytes()
{
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
return rusage.ru_maxrss * 1024L;
}
size_t GetMemorySize(void* memory)
{
return memory ? malloc_usable_size(memory) : 0;
}
}
}

@ -9,4 +9,5 @@
set(FILES
Tests/UtilsTests_Linux.cpp
../Common/UnixLike/Tests/UtilsTests_UnixLike.cpp
Tests/Memory/AllocatorBenchmarks_Linux.cpp
)

@ -0,0 +1,31 @@
/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <AzCore/PlatformIncl.h>
#include <AzCore/Debug/Trace.h>
#include <malloc/malloc.h>
#include <sys/resource.h>
namespace Benchmark
{
namespace Platform
{
size_t GetProcessMemoryUsageBytes()
{
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
return rusage.ru_maxrss;
}
size_t GetMemorySize(void* memory)
{
return memory ? malloc_size(memory) : 0;
}
}
}

@ -9,4 +9,5 @@
set(FILES
../Common/Apple/Tests/UtilsTests_Apple.cpp
../Common/UnixLike/Tests/UtilsTests_UnixLike.cpp
Tests/Memory/AllocatorBenchmarks_Mac.cpp
)

@ -0,0 +1,40 @@
/*
* Copyright (c) Contributors to the Open 3D Engine Project.
* For complete copyright and license terms please see the LICENSE at the root of this distribution.
*
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
*/
#include <AzCore/PlatformIncl.h>
#include <AzCore/Debug/Trace.h>
#include <malloc.h>
#include <psapi.h>
namespace Benchmark
{
namespace Platform
{
size_t GetProcessMemoryUsageBytes()
{
EmptyWorkingSet(GetCurrentProcess());
size_t memoryUsage = 0;
MEMORY_BASIC_INFORMATION mbi = { 0 };
unsigned char* pEndRegion = nullptr;
while (sizeof(mbi) == VirtualQuery(pEndRegion, &mbi, sizeof(mbi))) {
pEndRegion += mbi.RegionSize;
if ((mbi.AllocationProtect & PAGE_READWRITE) && (mbi.State & MEM_COMMIT)) {
memoryUsage += mbi.RegionSize;
}
}
return memoryUsage;
}
size_t GetMemorySize(void* memory)
{
return memory ? _aligned_msize(memory, 1, 0) : 0;
}
}
}

@ -9,6 +9,7 @@
set(FILES
../Common/WinAPI/Tests/UtilsTests_WinAPI.cpp
Tests/IO/Streamer/StorageDriveTests_Windows.cpp
Tests/Memory/AllocatorBenchmarks_Windows.cpp
Tests/Memory/OverrunDetectionAllocator_Windows.cpp
Tests/Serialization_Windows.cpp
)

@ -170,6 +170,7 @@ set(FILES
Math/Vector3Tests.cpp
Math/Vector4PerformanceTests.cpp
Math/Vector4Tests.cpp
Memory/AllocatorBenchmarks.cpp
Memory/AllocatorManager.cpp
Memory/HphaSchema.cpp
Memory/HphaSchemaErrorDetection.cpp

Loading…
Cancel
Save