benchmark-results/ignore_random_ops.ipynb

403 lines
225 KiB
Plaintext
Raw Normal View History

2020-04-20 16:57:40 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"r2vals.csv\").sort_values(\"r2 \")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>r2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>133.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.677843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.229390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.135144</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.474227</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.734562</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.878335</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>0.968964</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" r2 \n",
"count 133.000000\n",
"mean 0.677843\n",
"std 0.229390\n",
"min 0.135144\n",
"25% 0.474227\n",
"50% 0.734562\n",
"75% 0.878335\n",
"max 0.968964"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv(\"new_repr_no_outlier.csv\")\n",
"data[\"SQRSIZE\"] = data[\"SIZE\"].apply(np.sqrt)\n",
"data[\"LNSIZE\"] = data[\"SIZE\"].apply(np.log)\n",
"data[\"LOG2SIZE\"] = data[\"SIZE\"].apply(np.log2)\n",
"maps = data[\"MAP\"].unique()\n",
"tests = data[\"TEST\"].unique()\n",
"groups = data.groupby([\"MAP\", \"TEST\"])"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [],
"source": [
"size = pd.Series(range(1, 50000000, 100000))\n",
"predictors = pd.DataFrame([size, np.sqrt(size), np.log(size), np.log2(size)]).T\n",
"predictors.columns = [\"SIZE\",\"SQRSIZE\",\"LNSIZE\",\"LOG2SIZE\" ]\n",
"regression_results = pd.DataFrame([[],[], [], []]).T\n",
"rvals = []\n",
"\n",
"for i, group in groups:\n",
" results = smf.ols('TIME~SIZE+SQRSIZE+LNSIZE+LOG2SIZE', data=group).fit()\n",
" pred = results.predict(predictors)\n",
" rvals.append(results.rsquared)\n",
" regression_results = regression_results.append([[i[1], i[0], size[a], pred[a]] for a in range(len(test))]) \n",
"regression_results.rename(columns={0: \"test\", 1: \"map\", 2:\"size\", 3: \"time\"})\n",
"regression_results.columns = [\"test\", \"map\", \"size\", \"time\"]"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {},
"outputs": [],
"source": [
"regroup = regression_results.groupby([\"test\"])"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABZ8AAAJHCAYAAAD/vijJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOzdeVxU9f4/8Nc5s8Gw77hvCJI74IJimpa7lZXpLy011zbbb9bt3mt7N695v6mlaVlqmttVwzRtsdQsXFFExVxwQWVfBgZmO+f3xzADwwwICgL6et6H98A5n885nzMH0F585v0RZFmWQURERERERERERERUi8T6HgARERERERERERER3X4YPhMRERERERERERFRrWP4TERERERERERERES1juEzEREREREREREREdU6hs9EREREREREREREVOsYPhMRERERERERERFRrWP4TERERERUzsCBAxEREVHfw6jS//73P0RERGDBggU3fa7HH38cERERuHz5ci2MrOFcq7ZFRERg4MCB9T0MIiIiokaF4TMRERERUR1pzGHrjWisAW1CQgIiIiIwe/bs+h4KERER0W1FWd8DICIiIiJqSL766iuYTKb6HsZt6d///jeKi4sREhJS30OpsW3btkGlUtX3MIiIiIgaFYbPRERERETltGzZsr6HcNtq2rRpfQ/hhrVr166+h0BERETU6LDsBhERERHh8uXLiIiIwOOPP47i4mLMnTsXAwcORJcuXTBy5Ehs2bLF3jYhIQETJ05ETEwMoqOjMWvWLFy7ds3leWVZxtatW/HEE0+gR48e6Ny5M4YNG4YFCxaguLjYqX1qaioWLFiAcePGIS4uDp06dUJcXByef/55JCcnu7xG+RrNGzZswAMPPIAuXbqgV69eePXVV5Genl6j18JVzefyr4/BYMD8+fMxcOBAdOrUCQMHDsT8+fNhNBqd2u/fvx8AMGjQIERERNj/VNehQ4cwadIkdO/eHTExMZgyZQqOHj1aZR+z2YzVq1dj7NixiIqKQpcuXfDAAw/gq6++gtlsrsErAeTl5WHevHkYPnw4unTpgujoaDzxxBPYtWuXQztbDWoASEtLc7jXxx9/3N6uqjIkZ8+exSuvvOLw3F955RWcPXvWqW35Mhn5+fl466230K9fP3Tq1AlDhgzBl19+CVmWq3WPs2fPxhNPPAEA2LRpk8PYy9fUdlVSpPw4cnNz8c9//hNxcXHo2rUrHnnkEezevdvedufOnRg7diy6d++OXr164Y033kBBQYHLMdXmMyQiIiKqT5z5TERERER2JpMJkydPxvnz59GrVy/odDocOHAAf/vb3yDLMtzc3PDyyy/bw8Hjx49jx44dSElJwXfffQeNRmM/lyRJePXVV7F161ZotVp06tQJPj4+OH78OBYuXIjdu3dj5cqVcHNzs/dZu3Ytli9fjvbt26Njx45wc3NDamoqfvjhB/zyyy/4/PPPERsb63Lsc+fOxddff42YmBi0atUKiYmJ+O6775CUlIQtW7Y4jO1mXp8nn3wSp0+fRo8ePRAWFoaDBw9i8eLFSE9Px4cffggA0Gq1GD16NPbs2YOsrCwMGTIEWq22RtfatWsXnn32WZjNZnTp0gUtWrRASkoKxo8fj4ceeshln5KSEkyfPh0JCQnw8fFBt27doFarcezYMXzwwQdISEjAokWLIIrXn4Ny/vx5TJ48GVevXkWzZs0QFxeHoqIiHD16FDNnzsTf/vY3TJkyBYB1tvjo0aOxadMmaLVaDBkyxH6etm3bXvdaf/zxB5566ikUFxfjrrvuQs+ePXHu3DnEx8fj559/xuLFi9GrVy+nfgUFBRg7dix0Oh2io6Oh0+lw8OBB/Pvf/0ZRURGee+656147OjoamZmZ2Lt3L1q2bIno6Gj7scjIyOv2B4D8/HyMHTsWJSUliImJQVZWFg4ePIinnnoKX3zxBU6dOoWPPvoI0dHR6NevHw4fPoyNGzfi0qVLWLFiBQRBsJ+rNp8hERERUb2TiYiIiOiOd+nSJTk8PFwODw+XJ0yYIOt0OvuxvXv3yuHh4XJcXJzcs2dP+ccff7QfMxgM8oQJE+Tw8HB548aNDudcunSpHB4eLo8fP15OT0936PPGG2/I4eHh8ty5cx36HDp0SL548aLT+H799Ve5Y8eO8uDBg2VJkhyO3XPPPXJ4eLjcu3dvOSUlxb6/sLBQHjNmjMuxVcV2vspen7Fjx8p5eXn2YxcuXJCjo6PliIgIp7HbXptLly5V+/qyLMs6nU7u3bu3HB4eLm/YsMG+X5Ikee7cufaxfPLJJw795syZI4eHh8vPPfecXFBQ4HC+adOmyeHh4fLq1auvO0az2SyPHDlSDg8Pl5csWSKbzWb7sdTUVHngwIFyZGSkw+sty7IcHh4u33PPPZXel6trFRUVyX369JHDw8PlVatWObRfvny5HB4eLvft21fW6/X2/X/++af9NXjmmWfkkpIS+7HExEQ5MjJS7tatm1xYWFjpWMqzne+1116rtI2reys/jhdffFE2GAz2Y2vXrpXDw8PlwYMHyzExMfLhw4ftx/Lz8+WhQ4fK4eHh8p9//ulwzht5hkREREQNFX9dTkRERER2oijirbfegqenp31f3759ERkZiYyMDPTr1w/33nuv/ZharcbEiRMBAAcOHLDvN5vNWLZsGdzd3fHxxx8jODjYoc8//vEPBAUFYd26dZAkyX4sKioKLVq0cBpX//79MWTIEKSmpuL06dMuxz5r1iyEh4fbP/fw8MCTTz7pNLabIYoi3n33Xfj4+Nj3tWzZEvfffz9kWcbBgwdr5To7duxATk4OevTogYcffti+XxAEPP/88wgNDXXqk52djfXr1yMkJAQffvghvLy87Mc8PT3x3nvvQaVSYc2aNde9/q5du3D69Gncd999mD59OhQKhf1Yq1atMHv2bFgsFqxbt+4m7xTYvn07srKy0L17d4wfP97h2KRJk9CpUydkZmbihx9+cOrr4eGBt99+22FWe9euXdGvXz/o9fpKS7XUNk9PT/zjH/+AWq2273v44Yfh5+eH1NRUPPbYY+jevbv9mLe3N8aNGwfA8WuzNp8hERERUUPA8JmIiIiI7Jo2beqyTIJtEb64uLhKj2VkZNj3nThxArm5uejevbtD8Gzj5uaGjh07Ij8/H6mpqQ7H9Ho9tm3bhv/85z948803MXv2bMyePRt//fUXAODChQsux3733Xc77WvTpo3T2G5GkyZNEBYWVufXsYXYw4cPdzqmUqkcylrYJCQkwGQy4e6773ZZ4iMoKAitW7fG6dOnUVJSUuX19+7dCwAYPHiwy+O20hRJSUlV30g12O511KhRLo8/+OCDDu3K69ixI/z9/Z321/bzuJ6OHTvCz8/PYZ9CobAvsFjd75vafIZEREREDQFrPhMRERGRnasZtQDsQVhISEilxyouuAcA+/btu+4Ce7m5ufaPExIS8NJLLyErK6vS9kVFRS73N2nSxGmfh4eH09huhqtr1MV1bIFks2bNXB53tT8tLQ0AsH79eqxfv77K8+fn5zvU2q7sXK+++ipeffXVStuVf3Y36nr32rx5cwBwuXDkrXoe11PZ941tHK6Ou/q+qc1nSERERNQQMHwmIiIiIrvrLWJW3UXOZFkGYC3REBUVVWVbX19fANYZz88//zxyc3Mxc+ZMjBw5Ek2bNoVWq4UgCPj444+xZMkS+7lvdGw3oyEv8mZ7XSIjI9GhQ4cq26pUqiqP20qh9OvXD4GBgZW2qzjb91ZrKM/jeuMov6BgVWrzGRIRERE1BAyfiYiIiKjW2WZIt23bFh9++GG1+hw4cAC5ubkYMmQIXnzxRafjlZXbuB3ZSpXYZsJWdOXKFad9ttc8Ojoa//jHP27q+raZumPGjHFZ4qM2Xe9ebftdzbq/3dTmMyQiIiJqCBrGVAEiIiIiuq106dIFXl5e2L9/P/Ly8qrVp6CgAIDrEgU5OTnYt29frY7xVrDNTrVYLDXqZ6upvH37dqdjZrMZO3fudNrfu3dvKBQK7Nq1CyaT6QZ
"text/plain": [
"<Figure size 1728x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams[\"figure.figsize\"] = (24,5)\n",
"sns.set()\n",
"sns.set(font_scale=1.75)\n",
"ticks = [50000, 100000, 150000, 200000, 250000, 300000, 350000, 400000, 500000,\n",
"600000, 700000, 800000, 900000, 1000000,\n",
"2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000,\n",
"15000000, 20000000, 25000000, 30000000, 35000000, 40000000, 45000000, 50000000]\n",
"ticklabels = ['50 K', '100 K',\n",
" '150 K', '200 K', '250 K', '300 K',\n",
" '350 K', '400 K', '0.5 M', '0.6 M',\n",
" '0.7 M', '0.8 M', '0.9 M', '1 M',\n",
" '2 M', '3 M', '4 M', '5 M',\n",
" '6 M', '7 M', '8 M', '9 M',\n",
" '10 M', '15 M', '20 M', '25 M',\n",
" '30 M', '35 M', '40 M', '45 M', '50 M']\n",
"labels = {\n",
" 'int_delete' : [\"mean int deletion time\", \"deletion time (ns)\"],\n",
" 'int_insert' : [\"mean int insertion time\", \"insertion time(ns)\"],\n",
" 'int_nosucc_lookup' : [\"mean int unsucessful lookup time\", \"unsucessful lookup time (ns)\"],\n",
" 'int_succ_lookup' : [\"mean int succesful lookup time\", \"succesful lookup time (ns)\"],\n",
" 'string_delete' : [\"mean string deletion time\", \"deletion time (ns)\"],\n",
" 'string_insert' : [\"mean string insertion time\", \"insertion time(ns)\"], \n",
" 'string_nosucc_lookup' : [\"mean string unsucessful lookup time\", \"unsucessful lookup time (ns)\"],\n",
" 'string_succ_lookup' : [\"mean string succesful lookup time\", \"succesful lookup time (ns)\"]\n",
"}\n",
"\n",
"for i, group in regroup:\n",
" plot = sns.lineplot(x=\"size\", y=\"time\", hue=\"map\", data=group)\n",
" plt.xscale(\"log\")\n",
" plt.xticks(ticks, ticklabels)\n",
" plot.set_xticklabels(\n",
" plot.get_xticklabels(), \n",
" rotation=55, \n",
" horizontalalignment='center',\n",
" fontweight='light',\n",
" )\n",
" plt.ylabel(labels[i][1])\n",
" plt.legend()\n",
" plt.title(labels[i][0])\n",
" plt.show()\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test</th>\n",
" <th>map</th>\n",
" <th>size</th>\n",
" <th>time</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>int_delete</td>\n",
" <td>absl::flat_hash_map</td>\n",
" <td>1.0</td>\n",
" <td>2.516393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>int_delete</td>\n",
" <td>absl::flat_hash_map</td>\n",
" <td>100001.0</td>\n",
" <td>11.352983</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>int_delete</td>\n",
" <td>absl::flat_hash_map</td>\n",
" <td>200001.0</td>\n",
" <td>12.227014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>int_delete</td>\n",
" <td>absl::flat_hash_map</td>\n",
" <td>300001.0</td>\n",
" <td>12.806459</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>int_delete</td>\n",
" <td>absl::flat_hash_map</td>\n",
" <td>400001.0</td>\n",
" <td>13.254607</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>495</th>\n",
" <td>string_succ_lookup</td>\n",
" <td>tsl::sparse_map</td>\n",
" <td>49500001.0</td>\n",
" <td>267.156427</td>\n",
" </tr>\n",
" <tr>\n",
" <th>496</th>\n",
" <td>string_succ_lookup</td>\n",
" <td>tsl::sparse_map</td>\n",
" <td>49600001.0</td>\n",
" <td>267.172009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>string_succ_lookup</td>\n",
" <td>tsl::sparse_map</td>\n",
" <td>49700001.0</td>\n",
" <td>267.187371</td>\n",
" </tr>\n",
" <tr>\n",
" <th>498</th>\n",
" <td>string_succ_lookup</td>\n",
" <td>tsl::sparse_map</td>\n",
" <td>49800001.0</td>\n",
" <td>267.202512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>499</th>\n",
" <td>string_succ_lookup</td>\n",
" <td>tsl::sparse_map</td>\n",
" <td>49900001.0</td>\n",
" <td>267.217433</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>68000 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" test map size time\n",
"0 int_delete absl::flat_hash_map 1.0 2.516393\n",
"1 int_delete absl::flat_hash_map 100001.0 11.352983\n",
"2 int_delete absl::flat_hash_map 200001.0 12.227014\n",
"3 int_delete absl::flat_hash_map 300001.0 12.806459\n",
"4 int_delete absl::flat_hash_map 400001.0 13.254607\n",
".. ... ... ... ...\n",
"495 string_succ_lookup tsl::sparse_map 49500001.0 267.156427\n",
"496 string_succ_lookup tsl::sparse_map 49600001.0 267.172009\n",
"497 string_succ_lookup tsl::sparse_map 49700001.0 267.187371\n",
"498 string_succ_lookup tsl::sparse_map 49800001.0 267.202512\n",
"499 string_succ_lookup tsl::sparse_map 49900001.0 267.217433\n",
"\n",
"[68000 rows x 4 columns]"
]
},
"execution_count": 155,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"regression_results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}