Pod jednym z ostatnich moich wpisów wywiązała się ciekawa dyskusja o klasach std::set oraz std::unordered_set. W związku z tym postanowiłem, by w tym wpisie zbadać zapotrzebowanie pamięciowe powyższych dwóch klas.
Aby móc śledzić alokacje dokonywane przez kontener STL, napisałem allocator pamięci rr_debug_allocator. Klasa ta dziedziczy po std::allocator<T> i jedynym jej zadaniem jest wypisywanie na konsole zajmowanych i zwalnianych obszarów pamięci.
Jako punkt odniesienia przedstawiam zapotrzebowanie na pamięć dla kontenerów std::vector oraz std::list.
STL Custom Allocator
#pragma once
#include <cstdio>
#include <memory>
template < typename T >
class rr_debug_allocator: public std::allocator< T >
{
public:
using size_type = size_t;
using pointer = T*;
using const_pointer = const T*;
template< typename U >
struct rebind { using other = rr_debug_allocator<U>; };
pointer allocate(size_type n, const void *hint=0)
{
pointer result = std::allocator<T>::allocate(n, hint);
const size_t alloc_size = n*sizeof(T);
printf(" >> [%d] bytes allocated at [%p] for [%d] x [%d]\n",alloc_size ,result,n,sizeof(T));
return result;
}
void deallocate(pointer p, size_type n)
{
printf(" >> [%d] bytes deallocated at [%p] for [%d] x [%d]\n", n*sizeof(T), p, n,sizeof(T));
return std::allocator<T>::deallocate(p, n);
}
rr_debug_allocator() noexcept: std::allocator<T>() {
//printf(" >>>> rr_debug_allocator Constructor\n");
}
rr_debug_allocator(const rr_debug_allocator &a) noexcept: std::allocator<T>(a) { }
template <class U>
rr_debug_allocator(const rr_debug_allocator<U> &a) noexcept: std::allocator<T>(a) { }
~rr_debug_allocator() noexcept {
//printf(" >>>> ~rr_debug_allocator Destructor\n");
}
};
Aplikacja testowa
#include "custom_alocator.h"
#include <vector>
#include <list>
#include <set>
#include <unordered_set>
using element_t = int;
using allocator_t = rr_debug_allocator< element_t >;
constexpr int ELEMENTS_COUNT = 4;
void test_vector()
{
printf("test_vector\n");
std::vector < element_t, allocator_t > container;
for( int i=0;i<ELEMENTS_COUNT; ++i)
container.push_back(i);
}
void test_list()
{
printf("test_list\n");
std::list < element_t, allocator_t > container;
for( int i=0;i<ELEMENTS_COUNT; ++i)
container.push_back(i);
}
void test_set()
{
printf("test_set\n");
std::set < element_t, std::less<element_t>, allocator_t > container;
for( int i=0;i<ELEMENTS_COUNT; ++i)
container.insert(i);
}
void test_unordered_set()
{
printf("test_unordered_set\n");
std::unordered_set < element_t, std::hash<element_t>, std::equal_to<element_t>, allocator_t > container;
for( int i=0;i<ELEMENTS_COUNT; ++i)
container.insert(i);
}
int main() {
printf("sizeof(void*) = %d\n==============\n",sizeof(void*));
test_vector();
printf("\n");
test_list();
printf("\n");
test_set();
printf("\n");
test_unordered_set();
printf("\n");
return 0;
}
Analiza otrzymanych wyników
Logi dla trybu 32 bit
sizeof(void*) = 4
==============
test_vector
>> [4] bytes allocated at [00fba190] for [1] x [4]
>> [8] bytes allocated at [00fba000] for [2] x [4]
>> [4] bytes deallocated at [00fba190] for [1] x [4]
>> [16] bytes allocated at [00fb6e38] for [4] x [4]
>> [8] bytes deallocated at [00fba000] for [2] x [4]
>> [16] bytes deallocated at [00fb6e38] for [4] x [4]
test_list
>> [12] bytes allocated at [00fb6e38] for [1] x [12]
>> [12] bytes allocated at [00fb0568] for [1] x [12]
>> [12] bytes allocated at [00fb0580] for [1] x [12]
>> [12] bytes allocated at [00fba648] for [1] x [12]
>> [12] bytes deallocated at [00fb6e38] for [1] x [12]
>> [12] bytes deallocated at [00fb0568] for [1] x [12]
>> [12] bytes deallocated at [00fb0580] for [1] x [12]
>> [12] bytes deallocated at [00fba648] for [1] x [12]
test_set
>> [20] bytes allocated at [00fb0568] for [1] x [20]
>> [20] bytes allocated at [00fb0588] for [1] x [20]
>> [20] bytes allocated at [00fba648] for [1] x [20]
>> [20] bytes allocated at [00fba668] for [1] x [20]
>> [20] bytes deallocated at [00fba668] for [1] x [20]
>> [20] bytes deallocated at [00fba648] for [1] x [20]
>> [20] bytes deallocated at [00fb0588] for [1] x [20]
>> [20] bytes deallocated at [00fb0568] for [1] x [20]
test_unordered_set
>> [8] bytes allocated at [00fba0c0] for [1] x [8]
>> [8] bytes allocated at [00fba0d0] for [2] x [4]
>> [8] bytes allocated at [00fba100] for [1] x [8]
>> [20] bytes allocated at [00fb0568] for [5] x [4]
>> [8] bytes deallocated at [00fba0d0] for [2] x [4]
>> [8] bytes allocated at [00fba000] for [1] x [8]
>> [8] bytes allocated at [00fba030] for [1] x [8]
>> [8] bytes deallocated at [00fba030] for [1] x [8]
>> [8] bytes deallocated at [00fba000] for [1] x [8]
>> [8] bytes deallocated at [00fba100] for [1] x [8]
>> [8] bytes deallocated at [00fba0c0] for [1] x [8]
>> [20] bytes deallocated at [00fb0568] for [5] x [4]
Logi dla trybu 64 bit
sizeof(void*) = 8
==============
test_vector
>> [4] bytes allocated at [0000000000b514e0] for [1] x [4]
>> [8] bytes allocated at [0000000000b51500] for [2] x [4]
>> [4] bytes deallocated at [0000000000b514e0] for [1] x [4]
>> [16] bytes allocated at [0000000000b514e0] for [4] x [4]
>> [8] bytes deallocated at [0000000000b51500] for [2] x [4]
>> [16] bytes deallocated at [0000000000b514e0] for [4] x [4]
test_list
>> [24] bytes allocated at [0000000000b514e0] for [1] x [24]
>> [24] bytes allocated at [0000000000b51500] for [1] x [24]
>> [24] bytes allocated at [0000000000b51520] for [1] x [24]
>> [24] bytes allocated at [0000000000b51540] for [1] x [24]
>> [24] bytes deallocated at [0000000000b514e0] for [1] x [24]
>> [24] bytes deallocated at [0000000000b51500] for [1] x [24]
>> [24] bytes deallocated at [0000000000b51520] for [1] x [24]
>> [24] bytes deallocated at [0000000000b51540] for [1] x [24]
test_set
>> [40] bytes allocated at [0000000000b514e0] for [1] x [40]
>> [40] bytes allocated at [0000000000b51510] for [1] x [40]
>> [40] bytes allocated at [0000000000b51540] for [1] x [40]
>> [40] bytes allocated at [0000000000b51570] for [1] x [40]
>> [40] bytes deallocated at [0000000000b51570] for [1] x [40]
>> [40] bytes deallocated at [0000000000b51540] for [1] x [40]
>> [40] bytes deallocated at [0000000000b51510] for [1] x [40]
>> [40] bytes deallocated at [0000000000b514e0] for [1] x [40]
test_unordered_set
>> [16] bytes allocated at [0000000000b514e0] for [1] x [16]
>> [16] bytes allocated at [0000000000b51500] for [2] x [8]
>> [16] bytes allocated at [0000000000b51520] for [1] x [16]
>> [40] bytes allocated at [0000000000b51540] for [5] x [8]
>> [16] bytes deallocated at [0000000000b51500] for [2] x [8]
>> [16] bytes allocated at [0000000000b51500] for [1] x [16]
>> [16] bytes allocated at [0000000000b51570] for [1] x [16]
>> [16] bytes deallocated at [0000000000b51570] for [1] x [16]
>> [16] bytes deallocated at [0000000000b51500] for [1] x [16]
>> [16] bytes deallocated at [0000000000b51520] for [1] x [16]
>> [16] bytes deallocated at [0000000000b514e0] for [1] x [16]
>> [40] bytes deallocated at [0000000000b51540] for [5] x [8]
Podsumowanie
Przeprowadzony eksperyment spełniał następujące warunki:
- mamy
Nelementów do wstawiania. - kontenery były tworzone z domyślnymi ustawieniami.
- kompilator MinGW-w64 (w64 4.0).
std::vector
Element w kontenerze zajmuje dokładnie tyle samo pamięci co element poza kontenerem. W najgorszym przypadku całkowita pamięć zaalokowana przez kontener będzie równa
std::list
Element w kontenerze potrzebuje dwóch wskaźników na kolejny i poprzedni węzeł listy.
std::set
Dla każdego elementu kontener tworzy nowy węzeł drzewa. Każdy taki węzeł potrzebuje przechować dwa wskaźniki, przekazaną wartość oraz pewne wartości zależne od implementacji (głębokość obydwu pod-drzew potrzebna do balansowania drzewem)
std::unordered_set
Wewnętrzna implementacja kontenera std::unordered_set, w mojej wersji gcc, to połączenie klasy vector i list. Każdy element to węzeł listy jednokierunkowej, z tą jednak różnicą, że zamiast jednej listy mamy pewną ilość list K. Wartość K jest zależna od ilości elementów w kontenerze, podobnie jak ma to miejsce w klasie vector. Wraz z dodawaniem nowych elementów rozmiar K jest zwiększany. Przybliżoną wartość dla najgorszego przypadku możemy policzyć następująco.