work done

2020-04-20 08:37:08 -03:00
commit 3336fea3e5
63 changed files with 264607 additions and 0 deletions
--- a/.ipynb_checkpoints/better-plotting-checkpoint.ipynb
+++ b/.ipynb_checkpoints/better-plotting-checkpoint.ipynb
--- a/.ipynb_checkpoints/last_attempt-checkpoint.ipynb
+++ b/.ipynb_checkpoints/last_attempt-checkpoint.ipynb
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,3 @@
 {
    "python.pythonPath": "C:\\python3.8\\python.exe"
 }
--- a/better-plotting.ipynb
+++ b/better-plotting.ipynb
--- a/last_attempt.ipynb
+++ b/last_attempt.ipynb
--- a/last_attempt.py
+++ b/last_attempt.py
@ -0,0 +1,223 @@
 #!/usr/bin/env python
 # coding: utf-8
 # In[1]:
 import pandas as pd
 from pandas import DataFrame, Series
 from numpy import nan
 import matplotlib.pyplot as plt
 plt.rcParams["figure.figsize"] = (24,5)
 import seaborn as sns
 sns.set()
 sns.set(font_scale=1.75)
 # In[2]:
 cols = [
    "TEST","MAP","SIZE", "TIME"
 ]
 onmodded = pd.read_csv("new_repr.csv", quotechar="'", header=None)
 onmodded = onmodded.drop([0],1)
 onmodded.columns= cols
 onmodded = onmodded.drop([0],0)
 onmodded.iloc[:,2:] = onmodded.iloc[:,2:].astype("int32")
 styles = {'absl::flat_hash_map': ["#0000cc"],  # blue
          "absl::node_hash_map'": ["#3366ff"],
          'absl::node_hash_map': ["#99ccff"],
          'google::dense_hash_map': ["#ff0000"],  # reds
          'google::sparse_hash_map': ["#ff6666"],
          'phmap::parallel_flat_hash_map': ["#ff0066"],
          'ska::bytell_hash_map': ["#009933"],  # greens
          'ska::flat_hash_map': ["#33cc33"],
          'ska::unordered_map': ["#99ff66"],
          'tsl::hopscotch_map': ["#9900cc"],  # purples
          'tsl::robin_map': ["#cc33ff"],
          'tsl::sparse_map': ["#cc99ff"],
          'robin_hood::unordered_flat_map': ["#ffcc99"],
          'robin_hood::unordered_node_map': ["#ccff66"],
          'boost::unordered::unordered_map': ["#663300"],  # brown
          'emilib::HashMap': ["#9900cc"],  # purple
          # weird orange
          'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
          'std::unordered_map': ["#000000", "solid"],  # black
         }
 ticks = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
 600000, 700000, 800000, 900000, 1000000,
 2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
 15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
 ticklabels = ['50 K', '100 K',
 '150 K', '200 K', '250 K', '300 K',
 '350 K', '400 K', '0.5 M', '0.6 M',
 '0.7 M', '0.8 M', '0.9 M', '1 M',
 '2 M', '3 M', '4 M', '5 M',
 '6 M', '7 M', '8 M', '9 M',
 '10 M', '15 M', '20 M', '25 M',
 '30 M', '35 M', '40 M', '45 M', '50 M']
 labels = {
    'int_delete' : ["mean int deletion time", "deletion time (ns)"],
    'int_insert' : ["mean int insertion time", "insertion time(ns)"],
    'int_nosucc_lookup' : ["mean int unsucessful lookup time", "unsucessful lookup time (ns)"],
    'int_succ_lookup' : ["mean int succesful lookup time", "succesful lookup time (ns)"],
    'string_delete' : ["mean string deletion time", "deletion time (ns)"],
    'string_insert' : ["mean string insertion time", "insertion time(ns)"], 
    'string_nosucc_lookup' : ["mean string unsucessful lookup time", "unsucessful lookup time (ns)"],
    'string_succ_lookup' : ["mean string succesful lookup time", "succesful lookup time (ns)"]
 }
 # In[3]:
 # outlier testing functions
 def remove_with_modified_z_score(data, treshold=3.5):
    # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
    data = data.astype(int)
    stats = data.describe()
    median_absolute_deviation = abs(data - data.median()).median()
    if not median_absolute_deviation:
        return data
    modified_z_scores = abs(0.6745 * (data - data.median()) / median_absolute_deviation)
    cutoff = modified_z_scores <= treshold
    data = data * cutoff
    data = data.replace(0, nan)
    return data
 #function that takes one of the outlier testers and data, and removes outliers
 def remove_outlier(data, function):
    new_data = data.copy(True)
    new_data["TIME"] = new_data["TIME"].astype(int)
    new_data["SIZE"] = new_data["SIZE"].astype(int)
    new_data
    for i in range(4216):
        start = i * 30
        end = start+30
        new_data.loc[start:end, "TIME"] = function(data.loc[start:end, "TIME"])
        if not i % 420:
            print(i / 42 , "% done")
    return new_data
 # helpers for plot functions
 def sort_maps(test):
    maps = data[data["TEST"]== test]["MAP"].unique()
    new = [(gr_max.loc[test, i]["TIME"], i) for i in maps]
    new.sort()
    new = [i[1] for i in new]
    return new
 def divider(df, maplist):
    filters = df['MAP'].isin(maplist)
    return df[filters]
 def plotter2(test, data):
    mydata = data[data["TEST"] == test]
    maps = sort_maps(test)
    set1 = divider(mydata, maps[:5])
    set2 = divider(mydata, maps[5:11])
    set3 = divider(mydata, maps[11:])
    plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set1)
    plt.xscale("log")
    plt.xticks(ticks, ticklabels)
    plot.set_xticklabels(
        plot.get_xticklabels(), 
        rotation=55, 
        horizontalalignment='center',
        fontweight='light',
    )
    plt.ylabel(labels[test][1])
    plt.legend()
    plt.title(labels[test][0])
    plt.savefig("./plots/{}/{}.png".format(test,1))
    plt.clf()
    plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
    plt.xscale("log")
    plt.xticks(ticks, ticklabels)
    plot.set_xticklabels(
        plot.get_xticklabels(), 
        rotation=55, 
        horizontalalignment='center',
        fontweight='light',
        )
    plt.ylabel(labels[test][1])
    plt.legend()
    plt.title(labels[test][0])
    plt.savefig("./plots/{}/{}.png".format(test,2))
    plt.clf()
    plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2)
    plt.xscale("log")
    plt.xticks(ticks, ticklabels)
    plot.set_xticklabels(
        plot.get_xticklabels(), 
        rotation=55, 
        horizontalalignment='center',
        fontweight='light',
    )
    plt.ylabel(labels[test][1])
    plt.legend()
    plt.title(labels[test][0])
    plt.savefig("./plots/{}/{}.png".format(test,3))
    plt.clf()
 # In[4]:
 data = remove_outlier(onmodded, remove_with_modified_z_score)
 groups = data.groupby(["TEST", "MAP"])
 gr_max = groups.max()
 gr_mean = groups.mean()
 # In[5]:
 tests = data["TEST"].unique()
 for i in tests:
    plotter2(i, data)
 # In[6]:
 # In[ ]:
 # In[ ]:
 # In[ ]:
--- a/126481
+++ b/126481
--- a/new_repr.csv
+++ b/new_repr.csv
--- a/plot_stuff.py
+++ b/plot_stuff.py
@ -0,0 +1,90 @@
 import seaborn as sns
 import pandas as pd
 from pandas import DataFrame, Series
 import matplotlib.pyplot as plt
 import pathlib
 import os
 plt.rcParams["figure.figsize"] = (40, 5)
 sns.set()
 ## new cell
 cols = [
    "TEST", "MAP", '50000', '100000', '150000', '200000', '250000', '300000', '350000', '400000',
    '500000', '600000', '700000', '800000', '900000', '1000000', '2000000', '3000000',
    '4000000', '5000000', '6000000', '7000000', '8000000', '9000000', '10000000', '15000000',
    '20000000', '25000000', '30000000', '35000000', '40000000', '45000000', '50000000'
 ]
 data = pd.read_csv("results.csv", quotechar="'", header=None)
 data.columns = cols
 # data.head()
 ## new cell
 groups = data.groupby(["TEST", "MAP"])
 groups_mean = groups.mean()
 groups_std = groups.std()
 ## new cell
 def max_val(hmap, test):
    return groups_mean.loc[test, hmap].max()
 def sort_maps(test):
    maps = list(groups_mean.loc[test].index)
    new = [(max_val(i, test), i) for i in maps]
    new.sort()
    new = [i[1] for i in new]
    return new
 def plot_test(test, include_error=True, log=False):
    sizes = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,
        600000, 700000, 800000, 900000, 1000000,
        2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,
        15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]
    maps = sort_maps(test)
    # print(maps)
    count = 16
    repeats = [0, 5, 11]
    while count > -1:
        if not count and count not in repeats:
            break
        mp = maps[count]
        if include_error:
            plt.errorbar(groups_mean.columns,groups_mean.loc[test, mp], yerr=groups_std.loc[test, mp], label=mp)
        else:
            plt.plot(groups_mean.columns, groups_mean.loc[test, mp], label=mp)
        if count in repeats:
            if log:
                plt.xscale("log")
            plt.ylabel("{} time (ns)".format(test))
            plt.legend()
            plt.title(test)
            plt.savefig("./plots/{}/{}.png".format(test,count))
            plt.clf()
            # plt.show()
            repeats.pop(repeats.index(count))
        else:
            count -=1
 ## new cell
 tests = data["TEST"].unique()
 p = pathlib.Path("./plots")
 if not p.is_dir():
    p.mkdir()
 for i in tests:
    path = pathlib.Path("./plots/{}/".format(i))
    if not path.is_dir():
        path.mkdir()
    print(i)
    plot_test(i, False)
    # break
--- a/plots.7z
+++ b/plots.7z
--- a/plots/int_delete/1.png
+++ b/plots/int_delete/1.png
--- a/plots/int_delete/2.png
+++ b/plots/int_delete/2.png
--- a/plots/int_delete/3.png
+++ b/plots/int_delete/3.png
--- a/plots/int_insert/1.png
+++ b/plots/int_insert/1.png
--- a/plots/int_insert/2.png
+++ b/plots/int_insert/2.png
--- a/plots/int_insert/3.png
+++ b/plots/int_insert/3.png
--- a/plots/int_nosucc_lookup/1.png
+++ b/plots/int_nosucc_lookup/1.png
--- a/plots/int_nosucc_lookup/2.png
+++ b/plots/int_nosucc_lookup/2.png
--- a/plots/int_nosucc_lookup/3.png
+++ b/plots/int_nosucc_lookup/3.png
--- a/plots/int_succ_lookup/1.png
+++ b/plots/int_succ_lookup/1.png
--- a/plots/int_succ_lookup/2.png
+++ b/plots/int_succ_lookup/2.png
--- a/plots/int_succ_lookup/3.png
+++ b/plots/int_succ_lookup/3.png
--- a/plots/string_delete/1.png
+++ b/plots/string_delete/1.png
--- a/plots/string_delete/2.png
+++ b/plots/string_delete/2.png
--- a/plots/string_delete/3.png
+++ b/plots/string_delete/3.png
--- a/plots/string_insert/1.png
+++ b/plots/string_insert/1.png
--- a/plots/string_insert/2.png
+++ b/plots/string_insert/2.png
--- a/plots/string_insert/3.png
+++ b/plots/string_insert/3.png
--- a/plots/string_nosucc_lookup/1.png
+++ b/plots/string_nosucc_lookup/1.png
--- a/plots/string_nosucc_lookup/2.png
+++ b/plots/string_nosucc_lookup/2.png
--- a/plots/string_nosucc_lookup/3.png
+++ b/plots/string_nosucc_lookup/3.png
--- a/plots/string_succ_lookup/1.png
+++ b/plots/string_succ_lookup/1.png
--- a/plots/string_succ_lookup/2.png
+++ b/plots/string_succ_lookup/2.png
--- a/plots/string_succ_lookup/3.png
+++ b/plots/string_succ_lookup/3.png
--- a/plt.png
+++ b/plt.png
--- a/results.csv
+++ b/results.csv
--- a/sns.png
+++ b/sns.png
--- a/sorted.csv
+++ b/sorted.csv
--- a/to.py
+++ b/to.py
@ -0,0 +1,28 @@
 styles = {'absl::flat_hash_map': ["#0000cc", "--"],  # blue
          "absl::node_hash_map'": ["#3366ff", "--"],
          'absl::node_hash_map': ["#99ccff", "--"],
          'google::dense_hash_map': ["#ff0000", "-."],  # reds
          'google::sparse_hash_map': ["#ff6666", "-,"],
          'phmap::parallel_flat_hash_map': ["#ff0066", "-."],
          'ska::bytell_hash_map': ["#009933", "- "],  # greens
          'ska::flat_hash_map': ["#33cc33", "- "],
          'ska::unordered_map': ["#99ff66", "- "],
          'tsl::hopscotch_map': ["#9900cc", ":"],  # purples
          'tsl::robin_map': ["#cc33ff", ":"],
          'tsl::sparse_map': ["#cc99ff", ":"],
          'robin_hood::unordered_flat_map': ["#ffcc99", ".."],
          'robin_hood::unordered_node_map': ["#ccff66", ".."],
          'boost::unordered::unordered_map': ["#663300", "solid"],  # brown
          'emilib::HashMap': ["#9900cc", "solid"],  # purple
          # weird orange
          'phmap::parallel_node_hash_map': ["#ffcc66", "solid"],
          'std::unordered_map': ["#000000", "solid"],  # black
          }
--- a/with_error_nologscale/int_delete/0.png
+++ b/with_error_nologscale/int_delete/0.png
--- a/with_error_nologscale/int_delete/11.png
+++ b/with_error_nologscale/int_delete/11.png
--- a/with_error_nologscale/int_delete/5.png
+++ b/with_error_nologscale/int_delete/5.png
--- a/with_error_nologscale/int_insert/0.png
+++ b/with_error_nologscale/int_insert/0.png
--- a/with_error_nologscale/int_insert/11.png
+++ b/with_error_nologscale/int_insert/11.png
--- a/with_error_nologscale/int_insert/5.png
+++ b/with_error_nologscale/int_insert/5.png
--- a/with_error_nologscale/int_nosucc_lookup/0.png
+++ b/with_error_nologscale/int_nosucc_lookup/0.png
--- a/with_error_nologscale/int_nosucc_lookup/11.png
+++ b/with_error_nologscale/int_nosucc_lookup/11.png
--- a/with_error_nologscale/int_nosucc_lookup/5.png
+++ b/with_error_nologscale/int_nosucc_lookup/5.png
--- a/with_error_nologscale/int_succ_lookup/0.png
+++ b/with_error_nologscale/int_succ_lookup/0.png
--- a/with_error_nologscale/int_succ_lookup/11.png
+++ b/with_error_nologscale/int_succ_lookup/11.png
--- a/with_error_nologscale/int_succ_lookup/5.png
+++ b/with_error_nologscale/int_succ_lookup/5.png
--- a/with_error_nologscale/string_delete/0.png
+++ b/with_error_nologscale/string_delete/0.png
--- a/with_error_nologscale/string_delete/11.png
+++ b/with_error_nologscale/string_delete/11.png
--- a/with_error_nologscale/string_delete/5.png
+++ b/with_error_nologscale/string_delete/5.png
--- a/with_error_nologscale/string_insert/0.png
+++ b/with_error_nologscale/string_insert/0.png
--- a/with_error_nologscale/string_insert/11.png
+++ b/with_error_nologscale/string_insert/11.png
--- a/with_error_nologscale/string_insert/5.png
+++ b/with_error_nologscale/string_insert/5.png
--- a/with_error_nologscale/string_nosucc_lookup/0.png
+++ b/with_error_nologscale/string_nosucc_lookup/0.png
--- a/with_error_nologscale/string_nosucc_lookup/11.png
+++ b/with_error_nologscale/string_nosucc_lookup/11.png
--- a/with_error_nologscale/string_nosucc_lookup/5.png
+++ b/with_error_nologscale/string_nosucc_lookup/5.png
--- a/with_error_nologscale/string_succ_lookup/0.png
+++ b/with_error_nologscale/string_succ_lookup/0.png
--- a/with_error_nologscale/string_succ_lookup/11.png
+++ b/with_error_nologscale/string_succ_lookup/11.png
--- a/with_error_nologscale/string_succ_lookup/5.png
+++ b/with_error_nologscale/string_succ_lookup/5.png