updated to do 95% CI instead of 1 std error bars. As if a difference is even noticable
This commit is contained in:
parent
607cd2e1dd
commit
529fd8841a
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@ -1,3 +0,0 @@
|
|||||||
{
|
|
||||||
"python.pythonPath": "C:\\python3.8\\python.exe"
|
|
||||||
}
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
14038
last_attempt.html
Normal file
14038
last_attempt.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
223
last_attempt.py
223
last_attempt.py
@ -1,223 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
# In[1]:
|
|
||||||
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from pandas import DataFrame, Series
|
|
||||||
from numpy import nan
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
plt.rcParams["figure.figsize"] = (24,5)
|
|
||||||
import seaborn as sns
|
|
||||||
sns.set()
|
|
||||||
sns.set(font_scale=1.75)
|
|
||||||
|
|
||||||
|
|
||||||
# In[2]:
|
|
||||||
|
|
||||||
|
|
||||||
cols = [
|
|
||||||
"TEST","MAP","SIZE", "TIME"
|
|
||||||
]
|
|
||||||
onmodded = pd.read_csv("new_repr.csv", quotechar="'", header=None)
|
|
||||||
|
|
||||||
onmodded = onmodded.drop([0],1)
|
|
||||||
onmodded.columns= cols
|
|
||||||
onmodded = onmodded.drop([0],0)
|
|
||||||
onmodded.iloc[:,2:] = onmodded.iloc[:,2:].astype("int32")
|
|
||||||
|
|
||||||
|
|
||||||
styles = {'absl::flat_hash_map': ["#0000cc"], # blue
|
|
||||||
"absl::node_hash_map'": ["#3366ff"],
|
|
||||||
'absl::node_hash_map': ["#99ccff"],
|
|
||||||
|
|
||||||
'google::dense_hash_map': ["#ff0000"], # reds
|
|
||||||
'google::sparse_hash_map': ["#ff6666"],
|
|
||||||
'phmap::parallel_flat_hash_map': ["#ff0066"],
|
|
||||||
|
|
||||||
'ska::bytell_hash_map': ["#009933"], # greens
|
|
||||||
'ska::flat_hash_map': ["#33cc33"],
|
|
||||||
'ska::unordered_map': ["#99ff66"],
|
|
||||||
|
|
||||||
'tsl::hopscotch_map': ["#9900cc"], # purples
|
|
||||||
'tsl::robin_map': ["#cc33ff"],
|
|
||||||
'tsl::sparse_map': ["#cc99ff"],
|
|
||||||
|
|
||||||
'robin_hood::unordered_flat_map': ["#ffcc99"],
|
|
||||||
'robin_hood::unordered_node_map': ["#ccff66"],
|
|
||||||
|
|
||||||
'boost::unordered::unordered_map': ["#663300"], # brown
|
|
||||||
|
|
||||||
'emilib::HashMap': ["#9900cc"], # purple
|
|
||||||
|
|
||||||
# weird orange
|
|
||||||
'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
|
|
||||||
|
|
||||||
'std::unordered_map': ["#000000", "solid"], # black
|
|
||||||
}
|
|
||||||
ticks = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
|
|
||||||
600000, 700000, 800000, 900000, 1000000,
|
|
||||||
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
|
|
||||||
15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
|
|
||||||
ticklabels = ['50 K', '100 K',
|
|
||||||
'150 K', '200 K', '250 K', '300 K',
|
|
||||||
'350 K', '400 K', '0.5 M', '0.6 M',
|
|
||||||
'0.7 M', '0.8 M', '0.9 M', '1 M',
|
|
||||||
'2 M', '3 M', '4 M', '5 M',
|
|
||||||
'6 M', '7 M', '8 M', '9 M',
|
|
||||||
'10 M', '15 M', '20 M', '25 M',
|
|
||||||
'30 M', '35 M', '40 M', '45 M', '50 M']
|
|
||||||
labels = {
|
|
||||||
'int_delete' : ["mean int deletion time", "deletion time (ns)"],
|
|
||||||
'int_insert' : ["mean int insertion time", "insertion time(ns)"],
|
|
||||||
'int_nosucc_lookup' : ["mean int unsucessful lookup time", "unsucessful lookup time (ns)"],
|
|
||||||
'int_succ_lookup' : ["mean int succesful lookup time", "succesful lookup time (ns)"],
|
|
||||||
'string_delete' : ["mean string deletion time", "deletion time (ns)"],
|
|
||||||
'string_insert' : ["mean string insertion time", "insertion time(ns)"],
|
|
||||||
'string_nosucc_lookup' : ["mean string unsucessful lookup time", "unsucessful lookup time (ns)"],
|
|
||||||
'string_succ_lookup' : ["mean string succesful lookup time", "succesful lookup time (ns)"]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# In[3]:
|
|
||||||
|
|
||||||
|
|
||||||
# outlier testing functions
|
|
||||||
def remove_with_modified_z_score(data, treshold=3.5):
|
|
||||||
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
|
|
||||||
data = data.astype(int)
|
|
||||||
stats = data.describe()
|
|
||||||
median_absolute_deviation = abs(data - data.median()).median()
|
|
||||||
if not median_absolute_deviation:
|
|
||||||
return data
|
|
||||||
modified_z_scores = abs(0.6745 * (data - data.median()) / median_absolute_deviation)
|
|
||||||
cutoff = modified_z_scores <= treshold
|
|
||||||
data = data * cutoff
|
|
||||||
data = data.replace(0, nan)
|
|
||||||
return data
|
|
||||||
|
|
||||||
#function that takes one of the outlier testers and data, and removes outliers
|
|
||||||
def remove_outlier(data, function):
|
|
||||||
new_data = data.copy(True)
|
|
||||||
new_data["TIME"] = new_data["TIME"].astype(int)
|
|
||||||
new_data["SIZE"] = new_data["SIZE"].astype(int)
|
|
||||||
new_data
|
|
||||||
for i in range(4216):
|
|
||||||
start = i * 30
|
|
||||||
end = start+30
|
|
||||||
new_data.loc[start:end, "TIME"] = function(data.loc[start:end, "TIME"])
|
|
||||||
if not i % 420:
|
|
||||||
print(i / 42 , "% done")
|
|
||||||
return new_data
|
|
||||||
|
|
||||||
|
|
||||||
# helpers for plot functions
|
|
||||||
def sort_maps(test):
|
|
||||||
maps = data[data["TEST"]== test]["MAP"].unique()
|
|
||||||
new = [(gr_max.loc[test, i]["TIME"], i) for i in maps]
|
|
||||||
new.sort()
|
|
||||||
new = [i[1] for i in new]
|
|
||||||
return new
|
|
||||||
|
|
||||||
def divider(df, maplist):
|
|
||||||
filters = df['MAP'].isin(maplist)
|
|
||||||
return df[filters]
|
|
||||||
|
|
||||||
|
|
||||||
def plotter2(test, data):
|
|
||||||
mydata = data[data["TEST"] == test]
|
|
||||||
maps = sort_maps(test)
|
|
||||||
set1 = divider(mydata, maps[:5])
|
|
||||||
set2 = divider(mydata, maps[5:11])
|
|
||||||
set3 = divider(mydata, maps[11:])
|
|
||||||
|
|
||||||
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set1)
|
|
||||||
plt.xscale("log")
|
|
||||||
plt.xticks(ticks, ticklabels)
|
|
||||||
plot.set_xticklabels(
|
|
||||||
plot.get_xticklabels(),
|
|
||||||
rotation=55,
|
|
||||||
horizontalalignment='center',
|
|
||||||
fontweight='light',
|
|
||||||
)
|
|
||||||
plt.ylabel(labels[test][1])
|
|
||||||
plt.legend()
|
|
||||||
plt.title(labels[test][0])
|
|
||||||
plt.savefig("./plots/{}/{}.png".format(test,1))
|
|
||||||
plt.clf()
|
|
||||||
|
|
||||||
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
|
|
||||||
plt.xscale("log")
|
|
||||||
plt.xticks(ticks, ticklabels)
|
|
||||||
plot.set_xticklabels(
|
|
||||||
plot.get_xticklabels(),
|
|
||||||
rotation=55,
|
|
||||||
horizontalalignment='center',
|
|
||||||
fontweight='light',
|
|
||||||
)
|
|
||||||
plt.ylabel(labels[test][1])
|
|
||||||
plt.legend()
|
|
||||||
plt.title(labels[test][0])
|
|
||||||
plt.savefig("./plots/{}/{}.png".format(test,2))
|
|
||||||
plt.clf()
|
|
||||||
|
|
||||||
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
|
|
||||||
plt.xscale("log")
|
|
||||||
plt.xticks(ticks, ticklabels)
|
|
||||||
plot.set_xticklabels(
|
|
||||||
plot.get_xticklabels(),
|
|
||||||
rotation=55,
|
|
||||||
horizontalalignment='center',
|
|
||||||
fontweight='light',
|
|
||||||
)
|
|
||||||
plt.ylabel(labels[test][1])
|
|
||||||
plt.legend()
|
|
||||||
plt.title(labels[test][0])
|
|
||||||
plt.savefig("./plots/{}/{}.png".format(test,3))
|
|
||||||
plt.clf()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# In[4]:
|
|
||||||
|
|
||||||
|
|
||||||
data = remove_outlier(onmodded, remove_with_modified_z_score)
|
|
||||||
groups = data.groupby(["TEST", "MAP"])
|
|
||||||
gr_max = groups.max()
|
|
||||||
gr_mean = groups.mean()
|
|
||||||
|
|
||||||
|
|
||||||
# In[5]:
|
|
||||||
|
|
||||||
|
|
||||||
tests = data["TEST"].unique()
|
|
||||||
for i in tests:
|
|
||||||
plotter2(i, data)
|
|
||||||
|
|
||||||
|
|
||||||
# In[6]:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# In[ ]:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,90 +0,0 @@
|
|||||||
import seaborn as sns
|
|
||||||
import pandas as pd
|
|
||||||
from pandas import DataFrame, Series
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
import pathlib
|
|
||||||
import os
|
|
||||||
plt.rcParams["figure.figsize"] = (40, 5)
|
|
||||||
sns.set()
|
|
||||||
|
|
||||||
## new cell
|
|
||||||
cols = [
|
|
||||||
"TEST", "MAP", '50000', '100000', '150000', '200000', '250000', '300000', '350000', '400000',
|
|
||||||
'500000', '600000', '700000', '800000', '900000', '1000000', '2000000', '3000000',
|
|
||||||
'4000000', '5000000', '6000000', '7000000', '8000000', '9000000', '10000000', '15000000',
|
|
||||||
'20000000', '25000000', '30000000', '35000000', '40000000', '45000000', '50000000'
|
|
||||||
]
|
|
||||||
data = pd.read_csv("results.csv", quotechar="'", header=None)
|
|
||||||
data.columns = cols
|
|
||||||
# data.head()
|
|
||||||
|
|
||||||
## new cell
|
|
||||||
groups = data.groupby(["TEST", "MAP"])
|
|
||||||
groups_mean = groups.mean()
|
|
||||||
groups_std = groups.std()
|
|
||||||
|
|
||||||
## new cell
|
|
||||||
def max_val(hmap, test):
|
|
||||||
return groups_mean.loc[test, hmap].max()
|
|
||||||
|
|
||||||
def sort_maps(test):
|
|
||||||
maps = list(groups_mean.loc[test].index)
|
|
||||||
new = [(max_val(i, test), i) for i in maps]
|
|
||||||
new.sort()
|
|
||||||
new = [i[1] for i in new]
|
|
||||||
return new
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_test(test, include_error=True, log=False):
|
|
||||||
sizes = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
|
|
||||||
600000, 700000, 800000, 900000, 1000000,
|
|
||||||
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
|
|
||||||
15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
|
|
||||||
maps = sort_maps(test)
|
|
||||||
# print(maps)
|
|
||||||
count = 16
|
|
||||||
repeats = [0, 5, 11]
|
|
||||||
while count > -1:
|
|
||||||
if not count and count not in repeats:
|
|
||||||
break
|
|
||||||
mp = maps[count]
|
|
||||||
if include_error:
|
|
||||||
plt.errorbar(groups_mean.columns,groups_mean.loc[test, mp], yerr=groups_std.loc[test, mp], label=mp)
|
|
||||||
else:
|
|
||||||
plt.plot(groups_mean.columns, groups_mean.loc[test, mp], label=mp)
|
|
||||||
|
|
||||||
if count in repeats:
|
|
||||||
if log:
|
|
||||||
plt.xscale("log")
|
|
||||||
plt.ylabel("{} time (ns)".format(test))
|
|
||||||
|
|
||||||
plt.legend()
|
|
||||||
plt.title(test)
|
|
||||||
plt.savefig("./plots/{}/{}.png".format(test,count))
|
|
||||||
plt.clf()
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
repeats.pop(repeats.index(count))
|
|
||||||
else:
|
|
||||||
count -=1
|
|
||||||
## new cell
|
|
||||||
|
|
||||||
tests = data["TEST"].unique()
|
|
||||||
p = pathlib.Path("./plots")
|
|
||||||
if not p.is_dir():
|
|
||||||
p.mkdir()
|
|
||||||
for i in tests:
|
|
||||||
path = pathlib.Path("./plots/{}/".format(i))
|
|
||||||
if not path.is_dir():
|
|
||||||
path.mkdir()
|
|
||||||
print(i)
|
|
||||||
plot_test(i, False)
|
|
||||||
# break
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
12
readme.md
12
readme.md
@ -1,5 +1,8 @@
|
|||||||
# Files and what they contain
|
# Files and what they contain
|
||||||
|
|
||||||
|
# HTML export for those who don't have a data science stack + python installed
|
||||||
|
[last_attempt.html](./last_attempt.html)
|
||||||
|
|
||||||
## results.csv
|
## results.csv
|
||||||
|
|
||||||
[Results.csv](./results.csv) has the raw data as outputed by the benchmark code
|
[Results.csv](./results.csv) has the raw data as outputed by the benchmark code
|
||||||
@ -17,12 +20,13 @@ to
|
|||||||
|
|
||||||
## new_repr_no_outlier.csv
|
## new_repr_no_outlier.csv
|
||||||
|
|
||||||
[New_repr.csv](./new_repr_no_outlier.csv) has the data from no_repr but with outliers removed with the modified z score test
|
[new_repr_no_outlier.csv](./new_repr_no_outlier.csv) has the data from no_repr but with outliers removed with the modified z score test
|
||||||
|
|
||||||
# Notebooks
|
# Notebooks
|
||||||
|
|
||||||
## Better_plotting
|
|
||||||
better plotting is an incomplete and now broken notebook where we attempted to plot on the raw results
|
|
||||||
|
|
||||||
## Last_attempt
|
## Last_attempt
|
||||||
Last attempt has our last attempt at plotting, plus some random stuff at the end
|
Last attempt has our last attempt at plotting, plus some random stuff at the end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
28
to.py
28
to.py
@ -1,28 +0,0 @@
|
|||||||
styles = {'absl::flat_hash_map': ["#0000cc", "--"], # blue
|
|
||||||
"absl::node_hash_map'": ["#3366ff", "--"],
|
|
||||||
'absl::node_hash_map': ["#99ccff", "--"],
|
|
||||||
|
|
||||||
'google::dense_hash_map': ["#ff0000", "-."], # reds
|
|
||||||
'google::sparse_hash_map': ["#ff6666", "-,"],
|
|
||||||
'phmap::parallel_flat_hash_map': ["#ff0066", "-."],
|
|
||||||
|
|
||||||
'ska::bytell_hash_map': ["#009933", "- "], # greens
|
|
||||||
'ska::flat_hash_map': ["#33cc33", "- "],
|
|
||||||
'ska::unordered_map': ["#99ff66", "- "],
|
|
||||||
|
|
||||||
'tsl::hopscotch_map': ["#9900cc", ":"], # purples
|
|
||||||
'tsl::robin_map': ["#cc33ff", ":"],
|
|
||||||
'tsl::sparse_map': ["#cc99ff", ":"],
|
|
||||||
|
|
||||||
'robin_hood::unordered_flat_map': ["#ffcc99", ".."],
|
|
||||||
'robin_hood::unordered_node_map': ["#ccff66", ".."],
|
|
||||||
|
|
||||||
'boost::unordered::unordered_map': ["#663300", "solid"], # brown
|
|
||||||
|
|
||||||
'emilib::HashMap': ["#9900cc", "solid"], # purple
|
|
||||||
|
|
||||||
# weird orange
|
|
||||||
'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
|
|
||||||
|
|
||||||
'std::unordered_map': ["#000000", "solid"], # black
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user