work done

This commit is contained in:
MassiveAtoms 2020-04-20 08:37:08 -03:00
commit 3336fea3e5
63 changed files with 264607 additions and 0 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.pythonPath": "C:\\python3.8\\python.exe"
}

668
better-plotting.ipynb Normal file

File diff suppressed because one or more lines are too long

901
last_attempt.ipynb Normal file

File diff suppressed because one or more lines are too long

223
last_attempt.py Normal file
View File

@ -0,0 +1,223 @@
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
from pandas import DataFrame, Series
from numpy import nan
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (24,5)
import seaborn as sns
sns.set()
sns.set(font_scale=1.75)
# In[2]:
cols = [
"TEST","MAP","SIZE", "TIME"
]
onmodded = pd.read_csv("new_repr.csv", quotechar="'", header=None)
onmodded = onmodded.drop([0],1)
onmodded.columns= cols
onmodded = onmodded.drop([0],0)
onmodded.iloc[:,2:] = onmodded.iloc[:,2:].astype("int32")
styles = {'absl::flat_hash_map': ["#0000cc"], # blue
"absl::node_hash_map'": ["#3366ff"],
'absl::node_hash_map': ["#99ccff"],
'google::dense_hash_map': ["#ff0000"], # reds
'google::sparse_hash_map': ["#ff6666"],
'phmap::parallel_flat_hash_map': ["#ff0066"],
'ska::bytell_hash_map': ["#009933"], # greens
'ska::flat_hash_map': ["#33cc33"],
'ska::unordered_map': ["#99ff66"],
'tsl::hopscotch_map': ["#9900cc"], # purples
'tsl::robin_map': ["#cc33ff"],
'tsl::sparse_map': ["#cc99ff"],
'robin_hood::unordered_flat_map': ["#ffcc99"],
'robin_hood::unordered_node_map': ["#ccff66"],
'boost::unordered::unordered_map': ["#663300"], # brown
'emilib::HashMap': ["#9900cc"], # purple
# weird orange
'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
'std::unordered_map': ["#000000", "solid"], # black
}
ticks = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
600000, 700000, 800000, 900000, 1000000,
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
ticklabels = ['50 K', '100 K',
'150 K', '200 K', '250 K', '300 K',
'350 K', '400 K', '0.5 M', '0.6 M',
'0.7 M', '0.8 M', '0.9 M', '1 M',
'2 M', '3 M', '4 M', '5 M',
'6 M', '7 M', '8 M', '9 M',
'10 M', '15 M', '20 M', '25 M',
'30 M', '35 M', '40 M', '45 M', '50 M']
labels = {
'int_delete' : ["mean int deletion time", "deletion time (ns)"],
'int_insert' : ["mean int insertion time", "insertion time(ns)"],
'int_nosucc_lookup' : ["mean int unsucessful lookup time", "unsucessful lookup time (ns)"],
'int_succ_lookup' : ["mean int succesful lookup time", "succesful lookup time (ns)"],
'string_delete' : ["mean string deletion time", "deletion time (ns)"],
'string_insert' : ["mean string insertion time", "insertion time(ns)"],
'string_nosucc_lookup' : ["mean string unsucessful lookup time", "unsucessful lookup time (ns)"],
'string_succ_lookup' : ["mean string succesful lookup time", "succesful lookup time (ns)"]
}
# In[3]:
# outlier testing functions
def remove_with_modified_z_score(data, treshold=3.5):
# https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
data = data.astype(int)
stats = data.describe()
median_absolute_deviation = abs(data - data.median()).median()
if not median_absolute_deviation:
return data
modified_z_scores = abs(0.6745 * (data - data.median()) / median_absolute_deviation)
cutoff = modified_z_scores <= treshold
data = data * cutoff
data = data.replace(0, nan)
return data
#function that takes one of the outlier testers and data, and removes outliers
def remove_outlier(data, function):
new_data = data.copy(True)
new_data["TIME"] = new_data["TIME"].astype(int)
new_data["SIZE"] = new_data["SIZE"].astype(int)
new_data
for i in range(4216):
start = i * 30
end = start+30
new_data.loc[start:end, "TIME"] = function(data.loc[start:end, "TIME"])
if not i % 420:
print(i / 42 , "% done")
return new_data
# helpers for plot functions
def sort_maps(test):
maps = data[data["TEST"]== test]["MAP"].unique()
new = [(gr_max.loc[test, i]["TIME"], i) for i in maps]
new.sort()
new = [i[1] for i in new]
return new
def divider(df, maplist):
filters = df['MAP'].isin(maplist)
return df[filters]
def plotter2(test, data):
mydata = data[data["TEST"] == test]
maps = sort_maps(test)
set1 = divider(mydata, maps[:5])
set2 = divider(mydata, maps[5:11])
set3 = divider(mydata, maps[11:])
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set1)
plt.xscale("log")
plt.xticks(ticks, ticklabels)
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=55,
horizontalalignment='center',
fontweight='light',
)
plt.ylabel(labels[test][1])
plt.legend()
plt.title(labels[test][0])
plt.savefig("./plots/{}/{}.png".format(test,1))
plt.clf()
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
plt.xscale("log")
plt.xticks(ticks, ticklabels)
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=55,
horizontalalignment='center',
fontweight='light',
)
plt.ylabel(labels[test][1])
plt.legend()
plt.title(labels[test][0])
plt.savefig("./plots/{}/{}.png".format(test,2))
plt.clf()
plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
plt.xscale("log")
plt.xticks(ticks, ticklabels)
plot.set_xticklabels(
plot.get_xticklabels(),
rotation=55,
horizontalalignment='center',
fontweight='light',
)
plt.ylabel(labels[test][1])
plt.legend()
plt.title(labels[test][0])
plt.savefig("./plots/{}/{}.png".format(test,3))
plt.clf()
# In[4]:
data = remove_outlier(onmodded, remove_with_modified_z_score)
groups = data.groupby(["TEST", "MAP"])
gr_max = groups.max()
gr_mean = groups.mean()
# In[5]:
tests = data["TEST"].unique()
for i in tests:
plotter2(i, data)
# In[6]:
# In[ ]:
# In[ ]:
# In[ ]:

126481
new_repr Normal file

File diff suppressed because it is too large Load Diff

126481
new_repr.csv Normal file

File diff suppressed because it is too large Load Diff

90
plot_stuff.py Normal file
View File

@ -0,0 +1,90 @@
import seaborn as sns
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import pathlib
import os
plt.rcParams["figure.figsize"] = (40, 5)
sns.set()
## new cell
cols = [
"TEST", "MAP", '50000', '100000', '150000', '200000', '250000', '300000', '350000', '400000',
'500000', '600000', '700000', '800000', '900000', '1000000', '2000000', '3000000',
'4000000', '5000000', '6000000', '7000000', '8000000', '9000000', '10000000', '15000000',
'20000000', '25000000', '30000000', '35000000', '40000000', '45000000', '50000000'
]
data = pd.read_csv("results.csv", quotechar="'", header=None)
data.columns = cols
# data.head()
## new cell
groups = data.groupby(["TEST", "MAP"])
groups_mean = groups.mean()
groups_std = groups.std()
## new cell
def max_val(hmap, test):
return groups_mean.loc[test, hmap].max()
def sort_maps(test):
maps = list(groups_mean.loc[test].index)
new = [(max_val(i, test), i) for i in maps]
new.sort()
new = [i[1] for i in new]
return new
def plot_test(test, include_error=True, log=False):
sizes = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
600000, 700000, 800000, 900000, 1000000,
2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
maps = sort_maps(test)
# print(maps)
count = 16
repeats = [0, 5, 11]
while count > -1:
if not count and count not in repeats:
break
mp = maps[count]
if include_error:
plt.errorbar(groups_mean.columns,groups_mean.loc[test, mp], yerr=groups_std.loc[test, mp], label=mp)
else:
plt.plot(groups_mean.columns, groups_mean.loc[test, mp], label=mp)
if count in repeats:
if log:
plt.xscale("log")
plt.ylabel("{} time (ns)".format(test))
plt.legend()
plt.title(test)
plt.savefig("./plots/{}/{}.png".format(test,count))
plt.clf()
# plt.show()
repeats.pop(repeats.index(count))
else:
count -=1
## new cell
tests = data["TEST"].unique()
p = pathlib.Path("./plots")
if not p.is_dir():
p.mkdir()
for i in tests:
path = pathlib.Path("./plots/{}/".format(i))
if not path.is_dir():
path.mkdir()
print(i)
plot_test(i, False)
# break

BIN
plots.7z Normal file

Binary file not shown.

BIN
plots/int_delete/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 258 KiB

BIN
plots/int_delete/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

BIN
plots/int_delete/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

BIN
plots/int_insert/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

BIN
plots/int_insert/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 222 KiB

BIN
plots/int_insert/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 222 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 182 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 244 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 244 KiB

BIN
plots/int_succ_lookup/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

BIN
plots/int_succ_lookup/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 KiB

BIN
plots/int_succ_lookup/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 KiB

BIN
plots/string_delete/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 235 KiB

BIN
plots/string_delete/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 248 KiB

BIN
plots/string_delete/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 247 KiB

BIN
plots/string_insert/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 163 KiB

BIN
plots/string_insert/2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

BIN
plots/string_insert/3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 171 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 225 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 173 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 173 KiB

BIN
plt.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

4080
results.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
sns.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

4081
sorted.csv Normal file

File diff suppressed because it is too large Load Diff

28
to.py Normal file
View File

@ -0,0 +1,28 @@
styles = {'absl::flat_hash_map': ["#0000cc", "--"], # blue
"absl::node_hash_map'": ["#3366ff", "--"],
'absl::node_hash_map': ["#99ccff", "--"],
'google::dense_hash_map': ["#ff0000", "-."], # reds
'google::sparse_hash_map': ["#ff6666", "-,"],
'phmap::parallel_flat_hash_map': ["#ff0066", "-."],
'ska::bytell_hash_map': ["#009933", "- "], # greens
'ska::flat_hash_map': ["#33cc33", "- "],
'ska::unordered_map': ["#99ff66", "- "],
'tsl::hopscotch_map': ["#9900cc", ":"], # purples
'tsl::robin_map': ["#cc33ff", ":"],
'tsl::sparse_map': ["#cc99ff", ":"],
'robin_hood::unordered_flat_map': ["#ffcc99", ".."],
'robin_hood::unordered_node_map': ["#ccff66", ".."],
'boost::unordered::unordered_map': ["#663300", "solid"], # brown
'emilib::HashMap': ["#9900cc", "solid"], # purple
# weird orange
'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
'std::unordered_map': ["#000000", "solid"], # black
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB