143 lines
76 KiB
Plaintext
143 lines
76 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Python and R in a single Notebook \n",
|
||
|
"\n",
|
||
|
"From the instructions of [all-spark-notebook](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook) and [Stackoverflow](https://stackoverflow.com/questions/39008069/r-and-python-in-one-jupyter-notebook).\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Last run: 2019-12-19 09:09:24.610738 UTC\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import datetime\n",
|
||
|
"print('Last run:', datetime.datetime.utcnow(), 'UTC') # timezone can't be detected from browser"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Processing in Python\n",
|
||
|
"* Create a dataframe\n",
|
||
|
"* load the rpy2 module"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"n = 100\n",
|
||
|
"df = pd.DataFrame({\n",
|
||
|
" 'cups_of_coffee': np.random.exponential(3, size=n),\n",
|
||
|
" 'productivity': np.random.normal(100, 15, n),\n",
|
||
|
" 'gender': np.random.choice([\"f\", \"m\"], n)\n",
|
||
|
"})"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%load_ext rpy2.ipython"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"## Processing in R\n",
|
||
|
"\n",
|
||
|
"* Install and load ggplot2\n",
|
||
|
"* Receive the dataframe and set the figure size including resolution\n",
|
||
|
"* Plot the dataframe"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%%R\n",
|
||
|
"#install.packages(\"ggplot2\", repos='http://cran.us.r-project.org', quiet=TRUE)\n",
|
||
|
"library(ggplot2)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"/opt/conda/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
|
||
|
" res = PandasDataFrame.from_items(items)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxMAAAMTCAIAAABVON0HAAAACXBIWXMAAB7CAAAewgFu0HU+AAAgAElEQVR4nOzdeXxU1d0/8O+5984+WcmeQBICCMgmIirggoCKirJYC4pSH+2u1v6qtVVrFa219nGptk/bp621LvRRBNe64YKgLCKbAmEPIWTfk9nn3nt+fwwZQjKZmWT2mc/71dereOfM3G9yk8xnzjn3HMY5JwAAAAAIghDrAgAAAAASBpITAAAAQLCQnAAAAACCheQEAAAAECwkJwAAAIBgITkBAAAABAvJCQAAACBYSE4AAAAAwUJyAgAAAAgWkhMAAABAsJCcAAAAAIKF5AQAAAAQLCQnAAAAgGAhOQEAAAAES4p1Acmgo6NjyM/V6XRarZaIHA6H2+0OX1HxS5Ikg8FgtVpVVY11LdGQspfYZrMpihLrWqLBe4mdTqfL5Yp1OdEgiqLRaEydS6zVanU6HfVc4szMzFhXBLGE5BQGsiwP+bk6nU4URSLinIfyOglEEARRFFVVTZGv13uJKbQflQTiucSKoqTI16vValPtt5gxhksMKQujdQAAAADBQnICAAAACBaSEwAAAECwkJwAAAAAgoXkBAAAABAsJCcAAACAYCE5AQAAAAQLyQkAAAAgWEhOAAAAAMFCcgIAAAAIFpITAAAAQLCQnAAAAACCheQEAAAAECwkJwAAAIBgITkBAAAABAvJCQAAACBYSE4AAAAAwUJyAgAAAAgWkhMAAABAsJCcAAAAAIKF5AQAAAAQLCQnAAAAgGAhOQEAxJhNVR2cx7oKAAiKFOsCAABSVK3b/UxL+9udXc2yQkTDtZpFGek/HpaVLYmxLg0ABoQ+JwCAGFjXbZ116Nhzre2e2ERENS73M82tFxw+tsPmiG1tAOAHkhMAQLTttDtuPl5rUdX+DzXJ8rLjJ2rccvSrAoBgIDkBAETbPXWNTp8TmzgRUZus/Lq+McolAUCQkJwAAKJqr8O50z7AeBw7+f/vdVvbekbxACCuIDkBAETVgLGpF5nzrx2Y7QQQj5CcAACiqlvxMb2pP4uKdQoA4hGSEwBAVBVqgloOpgBrEwDEJSQnAICousBslBjz3yZbEqcY9NGpBwAGBckJACCqhoni9VkZ/tv8cFhWwHQFADGB5AQAEG0P5ueO1esGevQCs/HHOdnRrAcAgofkBAAQbWmi8GbZ8EvTzH2OM6JlmemrSks06HACiFfYtw4AIAayJfHl0uJtNsd/urqPuFwCsbE67cLM9HE6baxLAwB/kJxSncL5AaerWZbTRfFMvU6bep90m2Xlna7u/U6XTVHKddpL08wTBh5GAQivc4z6c4yYCQ6QSJCcUpeL8/9paf9ra1tLz1LFZkFYlpX+87ycTDElbofmRH9obnuyucXea+Gc3za2XJme9mRRPvarBwCA/pCcwoCF0E/jfS5jLJTXGaxuRV1afWKr1db7oEVV/9basa7buqZ8eKk2UkMGvb/kCJ0iSD+rbXihraP/8f90dR90Ot+rKA17goz5lxwdni8zyj/SMdT7y0ypLzl1LrFXCn7J0B/jPnedhGS3ZO/+tc2tAz06wWTcfvYUrZDMfyBebWr59r4Dfhosy8tdNX5M1OoBgISgqqog4OaqlIbkFAZWq3XIz9VqtRqNhoicTqcsy+Eryp8vuq2X7T/kv81TpSXfzcuJxNlFUdTr9Xa7XVWD2oMiQs7be2CPze6nASP6etL48pCn63ovscvlcrvdIb5aQoiTSxw1uMRJT6PRaLVaInK5XC6Xy2zue1MkpBSM1oWB3e7vDdg/QRA8f3PdbrcjWht8rmpqDtjm382ty9NMkTi7VqvV6/XRTIr91bnlk7GJn9qdvg9O9E5zy63DskI8V+9LHMqPSgKJh0scTYyxVLvEGo0m1S6xJzl5/lAjOaU4dDmmov0OZ+A2zsBtElett2PA74BkrTsl3hUAACB4SE6pKHAc4ORK6n3aTcFNUzBiNgMAAJwObwypqFSjCdCCUbk2UJtEVqHTBhOeJhmwsBMAAJwG85zigMOh7PpKPFBptFpUnV4tGeEeP4kbDJE74fx089rOrkBt0iJXQMzpGFuYkfZyeyfRgFOdCjXSxeaIzPQCAIDEheQUY+zAPtdba7jV4ukAEYnoYKX2i88cl1wmT5gcoZMuSDeP1+v2DTzbKVsSvx/yzOg494u8nA+6LS2y4jM2MaJHCvN0WLgFAABOh9G6WNIcrGT/9wK3WvocZ06H4b03Nbu3R+i8ImPPjyjOk3znZoPA/l5SlPQraBdopFWlJT6/CSJjjxTmXZ3UvW4AADA0SE4xw+x23ftv08Draek+fl/o9LHCdViUazUfVoyY12/dgbMM+nfKR1xgNkbovHHlLIP+s1Fl3x2WOaxnrXAtY5emmd8tH/G9ZO9yAwCAocFoXcxodm9nTn8LODFF0Wzf6rzksggVUKzRrCotOeJ0bbDammQ5SxSnGw2TDfqUGqDKkcRHC/MfLshrkhUn54UaCSN0AADgB5JTzEjVR4NoUxXpVZUqdNqKkJfJTnQiY4Ua/C4AAEBgGK2LGdZvepOPNpbuKFQCAAAAQUJyih1d4LWCeBBtAAAAIGqQnGJGLiwJ2EYpCtwGAAAAogbJKWbkSWdRoMnI7slTo1MMAAAABAPJKWaUnDzXOef7aSBPmKwML4tWOQAAABAYklMsOS+4hA8QntzjJjguvSrK9QAAAIB/uBM7pgSBX7lQe9Y05fP1ypGDzOnkGo1SPMI99Ry5YkysiwMAAIC+kJxiT6gYLVSMdlosTquFi5G9IkyWWUcbU1U1LSOimwoDAAAkJSSnOBLR2MQ6O/RffCYe3MfcbiIixpSiEueMC5WyisidFAAAIMlgnlNKEE9Um174m7R398nYRESci7U1xtdWab9YH8vKAAAAEgqSU/ITujoNr7/KHHYfj3Gu27RB882uqBcFAACQkJCckp/u8099xyZvg8/WneqLAgAAgIEhOSU5JrvFA5UB2tjtUtXh6NQDAACQ0JCckpzQ3s7kwP1JrKkhCsUAAAAkOiSnZHcyNnH/rZgsR6EWAACARIfklOTUjExijKjXBnm+QpSakRm1kgAAABIXklOS40aTUlB02qH+uwwLgoIlywEAAIKA5JT8XDMu8t9AzchS0zOiUwwAAEBCQ3JKfnJ5hWoyEw042Ulob5UO7o9mSQAAAAkKySn5idVVgtVC5Gucrofuq81RqwcAACBxITklP+l4VcA2Qt0J5nJFoRgAAICEhuSU/ASLJXAjzpk1iGYAAACpDckp+ak6bTDNuF4f6UoAAAASHZJT8lOLSgK3yczmBmMUigEAAEhoSE7JTx51Bjea/LdxTz4rOsUAAAAkNCSn5Mc1Wue8K4gNeGedkl/oPvvcaJYEAACQoJCcUoJ7zDjHpVeSKPZ/SC0qti9ZxkUp+lUBAAAkHLxfpgr3pKnK8FLt1k1S1WFm6eaiyAuKXBMmuydMIQEBGgAAIChITilEzRrmuHwBEZGi+Ox/AgAAAP+QnFJS6sUml8pXdXS+1dV9yOninMq1mqsz0m/MztAPPP0LAACgPyQnSH5HXa4bq2sPOk8tkt4oy1ts9r+2tr1YWjIuuPWuAAAACDPEIek1yfLiqhO9Y5NXtcu9uOp4jcsd/aoAACBBITlBklvZ2FLrHjAbtcjK/Q3N0awHAAASGpITJLMORVnb0eW/zXtd3XVuOTr1AABAokNygmS23e5wc+6/DSfaarNHpx4AAEh0SE6QzNplJZhmbTL6nAAAIChITpDMhgW3/kKuBjeZAgBAUJCcIJmdYzJoA63YJBCdbzRGpx4AAEh0SE6QzMyCcENWhv82izPTc6WUWxoUAACGBskJkty9+TkVA691WaLVrCzIi2Y9AACQ0JCcIMlliuLrZcOnGQ39H5qo171ZPhwdTgAAEDxMjIXkV6iR/lM+4j/dljc7uw46XZxolE57VZp5YUaaiH3rAABgMJCcICUIjBakmxekm2NdCAAAJDaM1gEAAAAEC31OKUE8dlQ6ekjoaOcaDc/Nd4+boGZkxrooAACAxIP
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%%R -i df -w 10 -h 10 --units cm -r 200\n",
|
||
|
"ggplot(df, aes(x=cups_of_coffee, y=productivity, color=gender)) + geom_point() + \n",
|
||
|
"geom_smooth(method=\"lm\", se=TRUE) + ylab(\"Normalized Productivity\") + xlab(\"Cups of Coffee per day\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.7.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 4
|
||
|
}
|