Some documentation
This commit is contained in:
@@ -1,15 +1,15 @@
|
||||
|
||||
#include "./includes/generator.h"
|
||||
|
||||
#include "./includes/generator.h" // imports a generator to be used for the functions
|
||||
|
||||
// generates string to be used as a key
|
||||
std::string gen_string() { // 90^size posibilities
|
||||
std::string randomstring;
|
||||
std::string randomstring;
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
randomstring += singlechar(generator);
|
||||
}
|
||||
return randomstring;
|
||||
}
|
||||
|
||||
// gen strings that dont exist in the hashmap
|
||||
std::string gen_unsuccesfull_string() { // 90^size posibilities
|
||||
std::string randomstring;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
@@ -18,10 +18,11 @@ std::string gen_unsuccesfull_string() { // 90^size posibilities
|
||||
return randomstring;
|
||||
}
|
||||
|
||||
// gen integers to be used as keys
|
||||
int gen_int() {
|
||||
return insert_int(generator);
|
||||
}
|
||||
|
||||
//gen ints that don't exist in the hashmap
|
||||
int gen_unsuccesfull_int() {
|
||||
return noninsert_int(generator);
|
||||
}
|
||||
|
@@ -4,13 +4,23 @@
|
||||
#include <fstream>
|
||||
#include "./tests.h"
|
||||
|
||||
// sizes that will be tested
|
||||
vector<int> sizes = {
|
||||
50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
|
||||
600000, 700000, 800000, 900000, 1000000,
|
||||
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
|
||||
15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000
|
||||
};
|
||||
// to print type info
|
||||
|
||||
|
||||
|
||||
/*
|
||||
to print typenames
|
||||
for more info, see
|
||||
https://stackoverflow.com/a/20170989
|
||||
and
|
||||
https://stackoverflow.com/a/56766138
|
||||
*/
|
||||
|
||||
template<typename T>
|
||||
constexpr auto type_name() {
|
||||
@@ -39,6 +49,17 @@ std::basic_string_view<char> name(T var) {
|
||||
return type_name<decltype(var)>();
|
||||
}
|
||||
|
||||
/*
|
||||
This is the function that outputs the results to a file.
|
||||
it calls int_test (n_run) times for all sizes < maxsize, and then append the
|
||||
results to the outputfile.
|
||||
We use a template function to massively reduce the ammount of code needed.
|
||||
Instead of writing 17 different functions for all different hashmaps, we can
|
||||
do this. The compiler will then see that a call is made where the map is of type
|
||||
std::unordered_map, for example, and then generate the function where T is
|
||||
replaced with std::unordered_map. Well, that's the simple explanation,
|
||||
more info at https://en.cppreference.com/w/cpp/language/templates
|
||||
*/
|
||||
|
||||
template<class T>
|
||||
void int_test_aggregate(T map, int runs, int maxsize=20000000) {
|
||||
@@ -71,7 +92,10 @@ void int_test_aggregate(T map, int runs, int maxsize=20000000) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
This is pretty much the same function, but it calls string_test instead of
|
||||
int_test. More info on why we needed to split this, can be seen in tests.h
|
||||
*/
|
||||
template<class T>
|
||||
void string_test_aggregate(T map, int runs, int maxsize=20000000) {
|
||||
std::ofstream output{"results.csv", std::ios_base::app};
|
||||
|
@@ -5,13 +5,15 @@
|
||||
|
||||
#include <random>
|
||||
#include <string>
|
||||
|
||||
// this generates bytes, and we use a seed so it stays deterministic
|
||||
static std::mt19937 generator(INT32_MAX - 2020);
|
||||
// these are distributions. they take the bytes that generator outputs
|
||||
// and do stuff with it so that every output of these has an equally likely chance of getting outputted
|
||||
static std::uniform_int_distribution<int> insert_int(1, INT32_MAX * 0.875);
|
||||
static std::uniform_int_distribution<int> noninsert_int(INT32_MAX * 0.875, INT32_MAX);
|
||||
static std::uniform_int_distribution<int> singlechar(33, 123);
|
||||
|
||||
|
||||
// see generator.cpp for more detail, but the names are pretty self explanatory
|
||||
int gen_int();
|
||||
|
||||
int gen_unsuccesfull_int();
|
||||
|
@@ -28,11 +28,17 @@
|
||||
using std::string;
|
||||
using absl::Hash;
|
||||
|
||||
|
||||
// this is the prepare function, again using template
|
||||
// this is for all maps which only need this. Below are the ones that
|
||||
// need something different
|
||||
template<class T>
|
||||
void prepare(T& map, int size) {
|
||||
map.reserve(size);
|
||||
}
|
||||
|
||||
// needs a tombstone marker(a key that's exclusively used to signify something
|
||||
// is deleted) and it doesn't have a reserve(size) member
|
||||
void prepare(google::sparse_hash_map<int, int>& map, int size) {
|
||||
map.set_deleted_key(-1);
|
||||
}
|
||||
@@ -41,6 +47,10 @@ void prepare(google::sparse_hash_map<string, string>& map, int size) {
|
||||
map.set_deleted_key("a");
|
||||
}
|
||||
|
||||
// needs a tombstone marker(a key that's exclusively used to signify something
|
||||
// is deleted) and an empty key marker
|
||||
// and it doesn't have a reserve(size) member
|
||||
|
||||
void prepare(google::dense_hash_map<int, int>& map, int size) {
|
||||
map.set_empty_key(0);
|
||||
map.set_deleted_key(-1);
|
||||
@@ -51,7 +61,9 @@ void prepare(google::dense_hash_map<string, string>& map, int size) {
|
||||
map.set_empty_key("");
|
||||
}
|
||||
|
||||
|
||||
// with abseil hash
|
||||
// this is a repeat of the 4 written above, but with types that accept abseil::Hash as hashing function
|
||||
void prepare(google::sparse_hash_map<int, int, Hash<int>>& map, int size) {
|
||||
map.set_deleted_key(-1);
|
||||
}
|
||||
|
@@ -18,14 +18,34 @@ using namespace std::chrono;
|
||||
using std::vector;
|
||||
using std::cout;
|
||||
|
||||
/*
|
||||
This is yet again a template function.
|
||||
basic functionality is like this:
|
||||
1. we create a vector to store the times
|
||||
2. we create and populate vectors for keys that will be used for the tests
|
||||
3. create the hashmap.
|
||||
4. call prepare(hashmap, size)
|
||||
5. populate the hashmap with size - 10 k,v pairs
|
||||
6. benchmark the vector access time, which will be subtracted later
|
||||
7. insert 10k keys(from insert_keys) and time it
|
||||
8. lookup 10k keys(from sample_keys) and time it
|
||||
9. lookup 10k nonexistent keys(nonkeys) and time it
|
||||
10. delete 10k keys(sample_keys) and time it
|
||||
times are added to the results vector, and that is returned.
|
||||
|
||||
(4) this step is called because some hashmaps require some extra steps before
|
||||
you use them. For example, setting a key that will be the thombstone marker, the
|
||||
key that will mark a location as empty, etc.
|
||||
|
||||
*/
|
||||
template<class T>
|
||||
vector<int> int_test(T map, int size) {
|
||||
vector<int> results; // insert, lookup, unsuccesful lookup, delete times
|
||||
vector<int> sample_keys; // get a sample of keys to lookup and later delete
|
||||
vector<int> sample_keys; // get a sample of keys to lookup and later delete, will be filled later
|
||||
|
||||
// unsuccesful lookup keys
|
||||
vector<int> nonkeys(10000);
|
||||
// generate uses a function(here, gen_unsuccesfull_int) to fill a container with values
|
||||
std::generate(nonkeys.begin(), nonkeys.end(), gen_unsuccesfull_int);
|
||||
|
||||
// keys for insert test
|
||||
@@ -38,12 +58,17 @@ vector<int> int_test(T map, int size) {
|
||||
{ // seperate scope, so all_keys gets destroyed. for good measure, empty it too
|
||||
vector<int> all_keys(size - 10000);
|
||||
std::generate(all_keys.begin(), all_keys.end(), gen_int);
|
||||
// sample inserts x ammount of values from old_container to
|
||||
// new_container with the help of a generator instance
|
||||
// in this case, random 10k keys from all_keys to sample_keys
|
||||
std::sample(all_keys.begin(), all_keys.end(), std::back_inserter(sample_keys), 10000, generator);
|
||||
|
||||
for (auto i : all_keys) {
|
||||
testmap.insert({i, i});
|
||||
}
|
||||
all_keys.clear();
|
||||
all_keys.clear(); // going out of scope should call the destructor to
|
||||
// clear it, but just making sure it's done
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -65,7 +90,7 @@ vector<int> int_test(T map, int size) {
|
||||
|
||||
auto insert_time = (duration_cast<nanoseconds>(insert_end - insert_start) - vector_acces_time) / 10000;
|
||||
results.push_back(insert_time.count());
|
||||
// remove some memory
|
||||
// clear all values in here, clear up some memory
|
||||
insert_keys.clear();
|
||||
|
||||
// lookup test
|
||||
@@ -103,7 +128,9 @@ vector<int> int_test(T map, int size) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
// pretty much the same, but with strings
|
||||
// the reason it's split up in 2 functions is because we need other functions to
|
||||
// generate the keys, and unfortunately we can't overload based on return type
|
||||
template<class T>
|
||||
vector<int> string_test(T map, int size) {
|
||||
vector<int> results; // insert, lookup, unsuccesful lookup, delete times
|
||||
|
@@ -24,7 +24,13 @@ bool use_abseil_hash = false;
|
||||
int runs = 1;
|
||||
int maxsize = 50000000;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
/*
|
||||
int_test_aggregate and string_test_aggregate are called for different hashmaps
|
||||
based on the choices
|
||||
see implementation of these in includes/aggregate_tests
|
||||
*/
|
||||
int main(int argc, char **argv) {
|
||||
// This is just stuff to add options like selecting which hashmaps need to get benchmarked
|
||||
CLI::App app{"Hashmap benchmarks"};
|
||||
app.add_option("-i,--implementation", hashmaps, choicetext )->delimiter(',');
|
||||
app.add_option("-a,--abseil", use_abseil_hash, "use absl::Hash, default is false");
|
||||
@@ -32,6 +38,7 @@ int main(int argc, char** argv) {
|
||||
app.add_option("-m, --maxsize", maxsize, "The max size of the hashmaps to test for. Default is 50 million.");
|
||||
CLI11_PARSE(app, argc, argv);
|
||||
time_point<steady_clock> start_test = steady_clock::now();
|
||||
// calls int_test_aggregate and it's string version for different hashmaps based on the choices selected
|
||||
if (use_abseil_hash) {
|
||||
for (auto i : hashmaps) {
|
||||
switch (i) {
|
||||
|
Reference in New Issue
Block a user