This commit is contained in:
TinyAtoms
2020-02-05 13:56:39 -03:00
parent 43c534f3ee
commit 64adaa9be1
38 changed files with 234 additions and 3183 deletions

View File

@@ -5,7 +5,7 @@
std::string gen_string() { // 90^size posibilities
std::string randomstring;
for (int i = 0; i < 5; ++i) {
randomstring += gen_char(generator);
randomstring += singlechar(generator);
}
return randomstring;
}
@@ -13,16 +13,15 @@ std::string gen_string() { // 90^size posibilities
std::string gen_unsuccesfull_string() { // 90^size posibilities
std::string randomstring;
for (int i = 0; i < 4; ++i) {
randomstring += gen_char(generator);
randomstring += singlechar(generator);//
}
return randomstring;
}
int gen_int(){
return gen_insert_int(generator);
return insert_int(generator);
}
int gen_unsuccesfull_int(){
return gen_noninsert_int(generator);
}
return noninsert_int(generator);
}

View File

@@ -6,15 +6,12 @@
#include <iostream>
std::vector<int> sizes = {
vector<int> sizes = {
50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
600000, 700000, 800000, 900000, 1000000,
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
15000000, 20000000, 25000000, 30000000, 35000000,
40000000, 45000000, 50000000
// 15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000
};
// to print type info
template <typename T>
@@ -47,113 +44,61 @@ std::basic_string_view<char> name(T var){
template<class T>
int int_test(T map, int repeats=30){
std::cout << "\t\t" << name(map) << "int tests \n";
void int_test_aggregate(T map, int runs){
std::ofstream output{"results.csv", std::ios_base::app};
// insert int tests
for (int i = 0; i < repeats; ++i){
std::string resultline = "insert_int, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = insert_int_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
}
output << resultline << "\n";
}
std::cout << "insert int test done\n";
for (int i = 0; i < runs; ++i);
string insert = "\nint_insert, '";
string succ_lookup = "\nint_succ_lookup, '";
string nosucc_lookup = "\nint_nosucc_lookup, '";
string delet = "\nint_delete, '";
// lookup int
for (int i = 0; i < repeats; ++i){
std::string resultline = "lookup_int, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = lookup_int_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
insert += string{name(map)} + "'";
succ_lookup += string{name(map)} + "'" ;
nosucc_lookup += string{name(map)} +"'" ;
delet += string{name(map)} + "'" ;
for ( auto size : sizes){
vector<float> results = int_test(map, size);
insert += ", " + std::to_string(results[0]);
succ_lookup += ", " + std::to_string(results[1]);
nosucc_lookup +=", " + std::to_string(results[2]);
delet += ", " + std::to_string(results[3]);
}
output << resultline << "\n";
}
std::cout << "lookup int test done\n";
// unsuccesful lookup
for (int i = 0; i < repeats; ++i){
std::string resultline = "nolookup_int, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = nolookup_int_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
}
output << resultline << "\n";
}
std::cout << "unsuccesful lookup int test done\n";
// deletion
for (int i = 0; i < repeats; ++i){
std::string resultline = "delete_int, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = delete_int_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
}
output << resultline << "\n";
}
std::cout << "deletion int test done\n";
return 0;
output << insert << succ_lookup << nosucc_lookup << delet;
cout << insert << succ_lookup << nosucc_lookup << delet;
}
template<class T>
int string_test(T map, int repeats=30){
std::cout <<"\t\t" << name(map) << " stringtest \n";
void string_test_aggregate(T map, int runs){
std::ofstream output{"results.csv", std::ios_base::app};
// insert int tests
for (int i = 0; i < repeats; ++i){
std::string resultline = "insert_string, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = insert_string_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
}
output << resultline << "\n";
}
std::cout << "insert string test done\n";
for (int i = 0; i < runs; ++i);
string insert = "\nint_insert, '";
string succ_lookup = "\nint_succ_lookup, '";
string nosucc_lookup = "\nint_nosucc_lookup, '";
string delet = "\nint_delete, '";
// lookup int
for (int i = 0; i < repeats; ++i){
std::string resultline = "lookup_string, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = lookup_string_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
insert += string{name(map)} + "'";
succ_lookup += string{name(map)} + "'" ;
nosucc_lookup += string{name(map)} +"'" ;
delet += string{name(map)} + "'" ;
for ( auto size : sizes){
vector<float> results = string_test(map, size);
insert += ", " + std::to_string(results[0]);
succ_lookup += ", " + std::to_string(results[1]);
nosucc_lookup +=", " + std::to_string(results[2]);
delet += ", " + std::to_string(results[3]);
}
output << resultline << "\n";
}
std::cout << "lookup string test done\n";
// unsuccesful lookup
for (int i = 0; i < repeats; ++i){
std::string resultline = "nolookup_string, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = nolookup_string_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
}
output << resultline << "\n";
}
std::cout << "unsuccesful lookup string test done\n";
// deletion
for (int i = 0; i < repeats; ++i){
std::string resultline = "delete_string, '" + std::string{name(map)} + "', ";
for (auto size: sizes){
float result = delete_string_test(size, T{}).count();
resultline += std::to_string(result);
resultline += ", ";
}
output << resultline << "\n";
}
std::cout << "deletion string test done\n";
return 0;
output << insert << succ_lookup << nosucc_lookup << delet;
cout << insert << succ_lookup << nosucc_lookup << delet;
}
#endif

View File

@@ -8,9 +8,9 @@
#include <random>
#include <string>
static std::mt19937 generator(INT32_MAX - 2020);
static std::uniform_int_distribution<int> gen_insert_int(1, INT32_MAX * 0.875);
static std::uniform_int_distribution<int> gen_noninsert_int(INT32_MAX * 0.875, INT32_MAX);
static std::uniform_int_distribution<int> gen_char(33, 123);
static std::uniform_int_distribution<int> insert_int(1, INT32_MAX * 0.875);
static std::uniform_int_distribution<int> noninsert_int(INT32_MAX * 0.875, INT32_MAX);
static std::uniform_int_distribution<int> singlechar(33, 123);
int gen_int();

View File

@@ -7,311 +7,201 @@
#include <iterator>
#include <chrono>
// maps
#include <sparsehash/sparse_hash_map>
// own
#include "./generator.h"
using namespace std::chrono;
using std::vector;
using std::string;
using std::cout;
// since my testing is based on this function, this one doesn't need no prep
void prepare(std::unordered_map<int, int>& map,int size){
map.reserve(size);
return;
}
void prepare(std::unordered_map<string, string>& map,int size){
map.reserve(size);
return;
}
void prepare(google::sparse_hash_map<int, int>& map, int size){
map.set_deleted_key(0);
return;
}
// writing out type info
// https://stackoverflow.com/questions/81870/is-it-possible-to-print-a-variables-type-in-standard-c/56766138#56766138
template <class T>
nanoseconds insert_int_test(int size, T testmap){
// init hashmap, insert size - 10k items
testmap.reserve(size);
for (int i = 0; i < size - 10000; ++i){
int a = gen_int();
testmap.insert({a,a});
}
// generate 10k keys
std::vector<int> keys(10000);
std::generate(keys.begin(), keys.end(), gen_int);
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : keys){
if (i == -1) { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
vector<float> int_test(T testmap, int size){
vector<float> results; // insert, lookup, unsuccesful lookup, delete times
vector<int> sample_keys; // get a sample of keys to lookup and later delete
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// unsuccesful lookup keys
vector<int> nonkeys(10000);
std::generate(nonkeys.begin(), nonkeys.end(), gen_unsuccesfull_int);
// measure insert time
time_point<steady_clock> start_insert_test = steady_clock::now();
for (auto i : keys){
// keys for insert test
vector<int> insert_keys(10000);
std::generate(insert_keys.begin(), insert_keys.end(), gen_int);
// T testmap {};
prepare(testmap, size); // do special actions, such as setting the tombstone marker for other, more exotic hashmaps
{ // seperate scope, so all_keys gets destroyed. for good measure, empty it too
vector<int> all_keys(size - 10000);
std::generate(all_keys.begin(), all_keys.end(), gen_int);
std::sample(all_keys.begin(), all_keys.end(), std::back_inserter(sample_keys), 10000, generator);
for (auto i : all_keys){
testmap.insert({i,i});
}
time_point<steady_clock> end_insert_test = steady_clock::now();
testmap.clear();
// time per insert
auto duration = duration_cast<nanoseconds>(end_insert_test - start_insert_test) - vector_time ;
return duration / 10000;
all_keys.clear();
}
// testing vector access times to subtract later
time_point<steady_clock> vector_start = steady_clock::now();
for (auto i : sample_keys){
if (i == -1 ) cout << "WTF"; // should never run, is here so loop doesnt get optimized away
}
time_point<steady_clock> vector_end = steady_clock::now();
auto vector_acces_time = duration_cast<nanoseconds>(vector_end - vector_start);
// insertion test
time_point<steady_clock> insert_start = steady_clock::now();
for (auto key : insert_keys){
testmap.insert({key,key});
}
time_point<steady_clock> insert_end = steady_clock::now();
auto insert_time = (duration_cast<nanoseconds>(insert_end - insert_start) - vector_acces_time) / 10000 ;
results.push_back(insert_time.count());
// remove some memory
insert_keys.clear();
// lookup test
time_point<steady_clock> lookup_start = steady_clock::now();
for (auto key : sample_keys){
if (testmap[key] == 0) cout << "WTF";
}
time_point<steady_clock> lookup_end = steady_clock::now();
auto lookup_time = (duration_cast<nanoseconds>(lookup_end - lookup_start) - vector_acces_time)/10000;
results.push_back(lookup_time.count());
// unsuccesful lookup test
time_point<steady_clock> unlookup_start = steady_clock::now();
for (auto key : nonkeys){
if (testmap[key] == -1) cout << "WTF";
}
time_point<steady_clock> unlookup_end = steady_clock::now();
auto unlookup_time = (duration_cast<nanoseconds>(unlookup_end - unlookup_start) - vector_acces_time) / 10000 ;
results.push_back(unlookup_time.count());
//free some memoru
nonkeys.clear();
// delete test
time_point<steady_clock> delete_start = steady_clock::now();
for (auto key : sample_keys){
testmap.erase(key);
}
time_point<steady_clock> delete_end = steady_clock::now();
auto delete_time = (duration_cast<nanoseconds>(delete_end - delete_start) - vector_acces_time) / 10000;
results.push_back(delete_time.count());
return results;
}
template <class T>
nanoseconds insert_string_test(int size, T testmap){
// init hashmap, insert size - 10k items
testmap.reserve(size);
for (int i = 0; i < size - 10000; ++i){
std::string temp = gen_string();
testmap.insert({temp,temp});
}
// generate 10k keys
std::vector<std::string> keys(10000);
std::generate(keys.begin(), keys.end(), gen_string);
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : keys){
if (i == "a") { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
vector<float> string_test(T map, int size){
vector<float> results; // insert, lookup, unsuccesful lookup, delete times
vector<string> sample_keys; // get a sample of keys to lookup and later delete
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// unsuccesful lookup keys
vector<string> nonkeys(10000);
std::generate(nonkeys.begin(), nonkeys.end(), gen_unsuccesfull_string);
// measure insert time
time_point<steady_clock> start_insert_test = steady_clock::now();
for (auto i : keys){
// keys for insert test
vector<string> insert_keys(10000);
std::generate(insert_keys.begin(), insert_keys.end(), gen_string);
T testmap {};
prepare(testmap, size); // do special actions, such as setting the tombstone marker for other, more exotic hashmaps
{ // seperate scope, so all_keys gets destroyed. for good measure, empty it too
vector<string> all_keys(size - 10000);
std::generate(all_keys.begin(), all_keys.end(), gen_string);
std::sample(all_keys.begin(), all_keys.end(), std::back_inserter(sample_keys), 10000, generator);
for (auto i : all_keys){
testmap.insert({i,i});
}
time_point<steady_clock> end_insert_test = steady_clock::now();
testmap.clear();
// time per insert
auto duration = duration_cast<nanoseconds>(end_insert_test - start_insert_test) - vector_time ;
return duration / 10000;
all_keys.clear();
}
}
template<class T>
nanoseconds lookup_int_test(int size, T testmap){
// reserve, get random 10k keys that are inserted, insert keys
testmap.reserve(size);
std::vector<int> sample_inserted;
sample_inserted.reserve(10000);
// testing vector access times to subtract later
time_point<steady_clock> vector_start = steady_clock::now();
for (auto i : sample_keys){
if (i == "" ) cout << "WTF"; // should never run, is here so loop doesnt get optimized away
}
time_point<steady_clock> vector_end = steady_clock::now();
auto vector_acces_time = duration_cast<nanoseconds>(vector_end - vector_start);
// insertion test
time_point<steady_clock> insert_start = steady_clock::now();
for (auto key : insert_keys){
testmap.insert({key,key});
}
time_point<steady_clock> insert_end = steady_clock::now();
auto insert_time = (duration_cast<nanoseconds>(insert_end - insert_start) - vector_acces_time) / 10000 ;
results.push_back(insert_time.count());
// remove some memory
insert_keys.clear();
// lookup test
time_point<steady_clock> lookup_start = steady_clock::now();
for (auto key : sample_keys){
if (testmap[key] == "") cout << "WTF";
}
time_point<steady_clock> lookup_end = steady_clock::now();
auto lookup_time = (duration_cast<nanoseconds>(lookup_end - lookup_start) - vector_acces_time)/10000;
results.push_back(lookup_time.count());
// unsuccesful lookup test
time_point<steady_clock> unlookup_start = steady_clock::now();
for (auto key : nonkeys){
if (testmap[key] == "") cout << "WTF";
}
time_point<steady_clock> unlookup_end = steady_clock::now();
auto unlookup_time = (duration_cast<nanoseconds>(unlookup_end - unlookup_start) - vector_acces_time) / 10000 ;
results.push_back(unlookup_time.count());
//free some memoru
nonkeys.clear();
// delete test
time_point<steady_clock> delete_start = steady_clock::now();
for (auto key : sample_keys){
testmap.erase(key);
}
{
std::vector<int> keys(size);
std::generate(keys.begin(), keys.end(), gen_int);
std::sample(keys.begin(), keys.end(), std::back_inserter(sample_inserted) , 10000, generator);
for (auto i : keys){
testmap.insert({i, i});
}
}
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : sample_inserted){
if (i == -1) { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
time_point<steady_clock> delete_end = steady_clock::now();
auto delete_time = (duration_cast<nanoseconds>(delete_end - delete_start) - vector_acces_time) / 10000;
results.push_back(delete_time.count());
return results;
// benchmark access time of hashmap
time_point<steady_clock> start_benchmark = steady_clock::now();
for (auto i : sample_inserted){
if (testmap[i] == -1){
std::cout << "SOMETHUNG IS WRONG!";
}
}
testmap.clear();
time_point<steady_clock> end_benchmark = steady_clock::now();
auto duration = duration_cast<nanoseconds>(end_benchmark - start_benchmark) - vector_time;
return duration / 10000;
}
template<class T>
nanoseconds lookup_string_test(int size, T testmap){
// reserve, get random 10k keys that are inserted, insert keys
testmap.reserve(size);
std::vector<std::string> sample_inserted(1000);
// sample_inserted.reserve(10000);
{
std::vector<std::string> keys(size);
std::generate(keys.begin(), keys.end(), gen_string);
std::sample(keys.begin(), keys.end(), std::back_inserter(sample_inserted) , 10000, generator);
for (auto i : keys){
testmap.insert({i, i});
}
}
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : sample_inserted){
if (i == "a") { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// benchmark access time of hashmap
time_point<steady_clock> start_benchmark = steady_clock::now();
for (auto i : sample_inserted){
if (testmap[i] == "a"){
std::cout << "SOMETHUNG IS WRONG!";
}
}
testmap.clear();
time_point<steady_clock> end_benchmark = steady_clock::now();
auto duration = duration_cast<nanoseconds>(end_benchmark - start_benchmark) - vector_time;
return duration / 10000;
}
template<class T>
nanoseconds nolookup_int_test(int size, T testmap){
// reserve, get random 10k keys that are inserted, insert keys
testmap.reserve(size);
std::vector<int>lookup_keys(10000);
std::generate(lookup_keys.begin(), lookup_keys.end(), gen_unsuccesfull_int);
for (int i = 0; i < size; ++i){
int temp = gen_int();
testmap.insert({temp, temp});
}
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : lookup_keys){
if (i == -1) { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// benchmark access time of hashmap
time_point<steady_clock> start_benchmark = steady_clock::now();
for (auto i : lookup_keys){
if (testmap[i] == -1){
std::cout << "SOMETHUNG IS WRONG!";
}
}
testmap.clear();
time_point<steady_clock> end_benchmark = steady_clock::now();
auto duration = duration_cast<nanoseconds>(end_benchmark - start_benchmark) - vector_time;
return duration / 10000;
}
template<class T>
nanoseconds nolookup_string_test(int size, T testmap){
// reserve, get random 10k keys that are inserted, insert keys
testmap.reserve(size);
std::vector<std::string>lookup_keys(10000);
std::generate(lookup_keys.begin(), lookup_keys.end(), gen_unsuccesfull_string);
for (int i = 0; i < size; ++i){
std::string temp = gen_string();
testmap.insert({temp, temp});
}
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : lookup_keys){
if (i == "a") { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// benchmark access time of hashmap
time_point<steady_clock> start_benchmark = steady_clock::now();
for (auto i : lookup_keys){
if (testmap[i] == "a"){
std::cout << "SOMETHUNG IS WRONG!";
}
}
testmap.clear();
time_point<steady_clock> end_benchmark = steady_clock::now();
auto duration = duration_cast<nanoseconds>(end_benchmark - start_benchmark) - vector_time;
return duration / 10000;
}
template<class T>
nanoseconds delete_int_test(int size, T testmap){
// reserve, get random 10k keys that are inserted, insert keys
testmap.reserve(size);
std::vector<int> sample_inserted;
sample_inserted.reserve(10000);
{
std::vector<int> keys(size);
std::generate(keys.begin(), keys.end(), gen_int);
std::sample(keys.begin(), keys.end(), std::back_inserter(sample_inserted) , 10000, generator);
for (auto i : keys){
testmap.insert({i, i});
}
}
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : sample_inserted){
if (i == -1) { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// benchmark access time of hashmap
time_point<steady_clock> start_benchmark = steady_clock::now();
for (auto i : sample_inserted){
testmap.erase(i);
}
testmap.clear();
time_point<steady_clock> end_benchmark = steady_clock::now();
auto duration = duration_cast<nanoseconds>(end_benchmark - start_benchmark) - vector_time;
return duration / 10000;
}
template<class T>
nanoseconds delete_string_test(int size, T testmap){
// reserve, get random 10k keys that are inserted, insert keys
testmap.reserve(size);
std::vector<std::string> sample_inserted;
sample_inserted.reserve(10000);
{
std::vector<std::string> keys(size);
std::generate(keys.begin(), keys.end(), gen_string);
std::sample(keys.begin(), keys.end(), std::back_inserter(sample_inserted) , 10000, generator);
for (auto i : keys){
testmap.insert({i, i});
}
}
// benchmark vector access time
time_point<steady_clock> start_v_access = steady_clock::now();
for (auto i : sample_inserted){
if (i == "a") { // it'll never be, this is just anti optimisation
std::cout << "Something is very wrong!";
}
}
time_point<steady_clock> end_v_access = steady_clock::now();
auto vector_time = duration_cast<nanoseconds>(end_v_access - start_v_access);
// benchmark access time of hashmap
time_point<steady_clock> start_benchmark = steady_clock::now();
for (auto i : sample_inserted){
testmap.erase(i);
}
testmap.clear();
time_point<steady_clock> end_benchmark = steady_clock::now();
auto duration = duration_cast<nanoseconds>(end_benchmark - start_benchmark) - vector_time;
return duration / 10000;
}
#endif /* TESTS_H */