#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd from pandas import DataFrame, Series from numpy import nan import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (24,5) import seaborn as sns sns.set() sns.set(font_scale=1.75) # In[2]: cols = [ "TEST","MAP","SIZE", "TIME" ] onmodded = pd.read_csv("new_repr.csv", quotechar="'", header=None) onmodded = onmodded.drop([0],1) onmodded.columns= cols onmodded = onmodded.drop([0],0) onmodded.iloc[:,2:] = onmodded.iloc[:,2:].astype("int32") styles = {'absl::flat_hash_map': ["#0000cc"], # blue "absl::node_hash_map'": ["#3366ff"], 'absl::node_hash_map': ["#99ccff"], 'google::dense_hash_map': ["#ff0000"], # reds 'google::sparse_hash_map': ["#ff6666"], 'phmap::parallel_flat_hash_map': ["#ff0066"], 'ska::bytell_hash_map': ["#009933"], # greens 'ska::flat_hash_map': ["#33cc33"], 'ska::unordered_map': ["#99ff66"], 'tsl::hopscotch_map': ["#9900cc"], # purples 'tsl::robin_map': ["#cc33ff"], 'tsl::sparse_map': ["#cc99ff"], 'robin_hood::unordered_flat_map': ["#ffcc99"], 'robin_hood::unordered_node_map': ["#ccff66"], 'boost::unordered::unordered_map': ["#663300"], # brown 'emilib::HashMap': ["#9900cc"], # purple # weird orange 'phmap::parallel_node_hash_map': ["#ffcc66", "solid"], 'std::unordered_map': ["#000000", "solid"], # black } ticks = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000, 15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000] ticklabels = ['50 K', '100 K', '150 K', '200 K', '250 K', '300 K', '350 K', '400 K', '0.5 M', '0.6 M', '0.7 M', '0.8 M', '0.9 M', '1 M', '2 M', '3 M', '4 M', '5 M', '6 M', '7 M', '8 M', '9 M', '10 M', '15 M', '20 M', '25 M', '30 M', '35 M', '40 M', '45 M', '50 M'] labels = { 'int_delete' : ["mean int deletion time", "deletion time (ns)"], 'int_insert' : ["mean int insertion time", "insertion time(ns)"], 'int_nosucc_lookup' : ["mean int unsucessful lookup time", "unsucessful lookup time (ns)"], 'int_succ_lookup' : ["mean int succesful lookup time", "succesful lookup time (ns)"], 'string_delete' : ["mean string deletion time", "deletion time (ns)"], 'string_insert' : ["mean string insertion time", "insertion time(ns)"], 'string_nosucc_lookup' : ["mean string unsucessful lookup time", "unsucessful lookup time (ns)"], 'string_succ_lookup' : ["mean string succesful lookup time", "succesful lookup time (ns)"] } # In[3]: # outlier testing functions def remove_with_modified_z_score(data, treshold=3.5): # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm data = data.astype(int) stats = data.describe() median_absolute_deviation = abs(data - data.median()).median() if not median_absolute_deviation: return data modified_z_scores = abs(0.6745 * (data - data.median()) / median_absolute_deviation) cutoff = modified_z_scores <= treshold data = data * cutoff data = data.replace(0, nan) return data #function that takes one of the outlier testers and data, and removes outliers def remove_outlier(data, function): new_data = data.copy(True) new_data["TIME"] = new_data["TIME"].astype(int) new_data["SIZE"] = new_data["SIZE"].astype(int) new_data for i in range(4216): start = i * 30 end = start+30 new_data.loc[start:end, "TIME"] = function(data.loc[start:end, "TIME"]) if not i % 420: print(i / 42 , "% done") return new_data # helpers for plot functions def sort_maps(test): maps = data[data["TEST"]== test]["MAP"].unique() new = [(gr_max.loc[test, i]["TIME"], i) for i in maps] new.sort() new = [i[1] for i in new] return new def divider(df, maplist): filters = df['MAP'].isin(maplist) return df[filters] def plotter2(test, data): mydata = data[data["TEST"] == test] maps = sort_maps(test) set1 = divider(mydata, maps[:5]) set2 = divider(mydata, maps[5:11]) set3 = divider(mydata, maps[11:]) plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set1) plt.xscale("log") plt.xticks(ticks, ticklabels) plot.set_xticklabels( plot.get_xticklabels(), rotation=55, horizontalalignment='center', fontweight='light', ) plt.ylabel(labels[test][1]) plt.legend() plt.title(labels[test][0]) plt.savefig("./plots/{}/{}.png".format(test,1)) plt.clf() plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2) plt.xscale("log") plt.xticks(ticks, ticklabels) plot.set_xticklabels( plot.get_xticklabels(), rotation=55, horizontalalignment='center', fontweight='light', ) plt.ylabel(labels[test][1]) plt.legend() plt.title(labels[test][0]) plt.savefig("./plots/{}/{}.png".format(test,2)) plt.clf() plot = sns.lineplot(x="SIZE", y="TIME", hue="MAP", data=set2) plt.xscale("log") plt.xticks(ticks, ticklabels) plot.set_xticklabels( plot.get_xticklabels(), rotation=55, horizontalalignment='center', fontweight='light', ) plt.ylabel(labels[test][1]) plt.legend() plt.title(labels[test][0]) plt.savefig("./plots/{}/{}.png".format(test,3)) plt.clf() # In[4]: data = remove_outlier(onmodded, remove_with_modified_z_score) groups = data.groupby(["TEST", "MAP"]) gr_max = groups.max() gr_mean = groups.mean() # In[5]: tests = data["TEST"].unique() for i in tests: plotter2(i, data) # In[6]: # In[ ]: # In[ ]: # In[ ]: