benchmark-results/last_attempt.py

224 lines
5.8 KiB
Python
Raw Normal View History

2020-04-20 11:37:08 +00:00
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
from pandas import DataFrame, Series
from numpy import nan
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (24,5)
import seaborn as sns
sns.set()
sns.set(font_scale=1.75)
# In[2]:
cols = [
"TEST","MAP","SIZE", "TIME"
]
onmodded = pd.read_csv("new_repr.csv", quotechar="'", header=None)
onmodded = onmodded.drop([0],1)
onmodded.columns= cols
onmodded = onmodded.drop([0],0)
onmodded.iloc[:,2:] = onmodded.iloc[:,2:].astype("int32")
styles = {'absl::flat_hash_map': ["#0000cc"], # blue
"absl::node_hash_map'": ["#3366ff"],
'absl::node_hash_map': ["#99ccff"],
'google::dense_hash_map': ["#ff0000"], # reds
'google::sparse_hash_map': ["#ff6666"],
'phmap::parallel_flat_hash_map': ["#ff0066"],
'ska::bytell_hash_map': ["#009933"], # greens
'ska::flat_hash_map': ["#33cc33"],
'ska::unordered_map': ["#99ff66"],
'tsl::hopscotch_map': ["#9900cc"], # purples
'tsl::robin_map': ["#cc33ff"],
'tsl::sparse_map': ["#cc99ff"],
'robin_hood::unordered_flat_map': ["#ffcc99"],
'robin_hood::unordered_node_map': ["#ccff66"],
'boost::unordered::unordered_map': ["#663300"], # brown
'emilib::HashMap': ["#9900cc"], # purple
# weird orange
'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
'std::unordered_map': ["#000000", "solid"], # black
}
ticks = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
600000, 700000, 800000, 900000, 1000000,
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
ticklabels = ['50 K', '100 K',
'150 K', '200 K', '250 K', '300 K',
'350 K', '400 K', '0.5 M', '0.6 M',
'0.7 M', '0.8 M', '0.9 M', '1 M',
'2 M', '3 M', '4 M', '5 M',
'6 M', '7 M', '8 M', '9 M',
'10 M', '15 M', '20 M', '25 M',
'30 M', '35 M', '40 M', '45 M', '50 M']
labels = {
'int_delete' : ["mean int deletion time", "deletion time (ns)"],
'int_insert' : ["mean int insertion time", "insertion time(ns)"],
'int_nosucc_lookup' : ["mean int unsucessful lookup time", "unsucessful lookup time (ns)"],
'int_succ_lookup' : ["mean int succesful lookup time", "succesful lookup time (ns)"],
'string_delete' : ["mean string deletion time", "deletion time (ns)"],
'string_insert' : ["mean string insertion time", "insertion time(ns)"],
'string_nosucc_lookup' : ["mean string unsucessful lookup time", "unsucessful lookup time (ns)"],
'string_succ_lookup' : ["mean string succesful lookup time", "succesful lookup time (ns)"]
}
# In[3]:
# outlier testing functions
def remove_with_modified_z_score(data, treshold=3.5):
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
data = data.astype(int)
stats = data.describe()
median_absolute_deviation = abs(data - data.median()).median()
if not median_absolute_deviation:
return data
modified_z_scores = abs(0.6745 * (data - data.median()) / median_absolute_deviation)
cutoff = modified_z_scores <= treshold
data = data * cutoff
data = data.replace(0, nan)
return data
#function that takes one of the outlier testers and data, and removes outliers
def remove_outlier(data, function):
new_data = data.copy(True)
new_data["TIME"] = new_data["TIME"].astype(int)
new_data["SIZE"] = new_data["SIZE"].astype(int)
new_data
for i in range(4216):
start = i * 30
end = start+30
new_data.loc[start:end, "TIME"] = function(data.loc[start:end, "TIME"])
if not i % 420:
print(i / 42 , "% done")
return new_data
# helpers for plot functions
def sort_maps(test):
maps = data[data["TEST"]== test]["MAP"].unique()
new = [(gr_max.loc[test, i]["TIME"], i) for i in maps]
new.sort()
new = [i[1] for i in new]
return new
def divider(df, maplist):
filters = df['MAP'].isin(maplist)
return df[filters]
def plotter2(test, data):
mydata = data[data["TEST"] == test]
maps = sort_maps(test)
set1 = divider(mydata, maps[:5])
set2 = divider(mydata, maps[5:11])
set3 = divider(mydata, maps[11:])
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set1)
plt.xscale("log")
plt.xticks(ticks, ticklabels)
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=55,
horizontalalignment='center',
fontweight='light',
)
plt.ylabel(labels[test][1])
plt.legend()
plt.title(labels[test][0])
plt.savefig("./plots/{}/{}.png".format(test,1))
plt.clf()
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
plt.xscale("log")
plt.xticks(ticks, ticklabels)
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=55,
horizontalalignment='center',
fontweight='light',
)
plt.ylabel(labels[test][1])
plt.legend()
plt.title(labels[test][0])
plt.savefig("./plots/{}/{}.png".format(test,2))
plt.clf()
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
plt.xscale("log")
plt.xticks(ticks, ticklabels)
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=55,
horizontalalignment='center',
fontweight='light',
)
plt.ylabel(labels[test][1])
plt.legend()
plt.title(labels[test][0])
plt.savefig("./plots/{}/{}.png".format(test,3))
plt.clf()
# In[4]:
data = remove_outlier(onmodded, remove_with_modified_z_score)
groups = data.groupby(["TEST", "MAP"])
gr_max = groups.max()
gr_mean = groups.mean()
# In[5]:
tests = data["TEST"].unique()
for i in tests:
plotter2(i, data)
# In[6]:
# In[ ]:
# In[ ]:
# In[ ]: