diff --git a/extra/Getting_Started/ElasticsearchConnection.ipynb b/extra/Getting_Started/ElasticsearchConnection.ipynb
new file mode 100755
index 0000000..c860001
--- /dev/null
+++ b/extra/Getting_Started/ElasticsearchConnection.ipynb
@@ -0,0 +1,1243 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Elasticsearch Data Analytics\n",
+ "\n",
+ "This notebook provides sample code to fetch Elasticsearch Data into and analyze it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from IPython.display import Image\n",
+ "from IPython.core.display import HTML \n",
+ "Image(url= \"https://www.antaresnet.com/wp-content/uploads/2018/07/Elasticsearch-Logo-Color-V.png\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Loading modules and connect to the Elastic Stack"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "pycharm": {
+ "is_executing": false
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "import json\n",
+ "import requests\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "import pytz\n",
+ "from datetime import datetime, timedelta\n",
+ "from dateutil import tz\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "sns.set(style=\"darkgrid\")\n",
+ "plt.rcParams[\"figure.figsize\"] = (18,10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "pycharm": {
+ "is_executing": false
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Last run: 2019-12-20 08:12:58.766840 UTC, status: 7.0604683677036 %\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'name': 'c185f3ed577c',\n",
+ " 'cluster_name': 'il.es.cluster',\n",
+ " 'cluster_uuid': 'sBgbgyRXTvKta2cEJCczKQ',\n",
+ " 'version': {'number': '6.2.2',\n",
+ " 'build_hash': '10b1edd',\n",
+ " 'build_date': '2018-02-16T19:01:30.685723Z',\n",
+ " 'build_snapshot': False,\n",
+ " 'lucene_version': '7.2.1',\n",
+ " 'minimum_wire_compatibility_version': '5.6.0',\n",
+ " 'minimum_index_compatibility_version': '5.0.0'},\n",
+ " 'tagline': 'You Know, for Search'}"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# connect to our cluster\n",
+ "from elasticsearch import Elasticsearch\n",
+ "es = Elasticsearch([{'host': 'elasticsearch', 'port': 9200}])\n",
+ "print('Last run: {} UTC, status: {} %'.format(\n",
+ " datetime.utcnow(),\n",
+ " es.cluster.health()['active_shards_percent_as_number']))\n",
+ "es.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Display our indices and document types saved in elasticsearch.\n",
+ "\n",
+ "Update the elasticsearch package."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: elasticsearch<8.0.0,>=7.0.0 in /opt/conda/lib/python3.7/site-packages (7.1.0)\n",
+ "Requirement already satisfied: urllib3>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from elasticsearch<8.0.0,>=7.0.0) (1.25.7)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!sudo pip install \"elasticsearch>=7.0.0,<8.0.0\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Defining useful functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# %load scroller.py\n",
+ "def scroller(index, quantity, timerange=timedelta(days=0), startdt=\"\", enddt=\"\"):\n",
+ " print(\"Starting to scroll\", end='')\n",
+ " # Retrieve the datetimes, note that timerange has a higher priority\n",
+ " if timerange.total_seconds() > 0:\n",
+ " now = datetime.utcnow().replace(tzinfo=pytz.UTC)\n",
+ " startdt = (now - timerange).isoformat()\n",
+ " enddt = now.isoformat()\n",
+ " \n",
+ " # search the first page and write the result to data\n",
+ " response = es.search(\n",
+ " index=index,\n",
+ " body={\n",
+ " \"query\": {\n",
+ " \"bool\": {\n",
+ " \"must\": [\n",
+ " {\"range\" : {\n",
+ " \"phenomenonTime\" : {\n",
+ " #\"gte\": \"2018-02-20T09:08:34.230693+00:00\", \n",
+ " \"gte\": startdt,\n",
+ " \"lte\": enddt, \n",
+ " \"time_zone\": \"+01:00\"\n",
+ " }\n",
+ " }},\n",
+ " {\n",
+ " \"match_phrase\": {\n",
+ " \"Datastream.name.keyword\": quantity\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " }\n",
+ " }\n",
+ " },\n",
+ " scroll='10m'\n",
+ " )\n",
+ " data = [[row[\"_source\"][\"phenomenonTime\"], row[\"_source\"][\"result\"]] for row in response['hits']['hits']]\n",
+ "\n",
+ " # Append new pages until there aren't any left\n",
+ " while len(response['hits']['hits']):\n",
+ " print(\".\", end='')\n",
+ " # process results\n",
+ " # print([item[\"_id\"] for item in response[\"hits\"][\"hits\"]])\n",
+ " response = es.scroll(scroll_id=response['_scroll_id'], scroll='10m')\n",
+ " data += [[row[\"_source\"][\"phenomenonTime\"], row[\"_source\"][\"result\"]] for row in response['hits']['hits']]\n",
+ " \n",
+ " # Convert data to a DataFrame and return it\n",
+ " df = pd.DataFrame(data, columns=[\"phenomenonTime\", quantity])\n",
+ " # df.index = pd.to_datetime(df[\"phenomenonTime\"].map(lambda t: t.split(\".\")[0]), utc=True)\n",
+ " df.index = pd.to_datetime(df[\"phenomenonTime\"].map(lambda t: roundto(t, 1)), utc=True)\n",
+ " df = df.drop([\"phenomenonTime\"], axis=1)\n",
+ " print(\"\\nFetched {} tuples.\".format(df.shape[0]))\n",
+ " return df\n",
+ "\n",
+ "def roundto(string, n):\n",
+ " base = string.split(\".\")[0]\n",
+ " if n > 0:\n",
+ " base += \".\" + string.split(\".\")[1][:n]\n",
+ " return base\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Gather data from Elasticsearch\n",
+ "\n",
+ "It is supposed that in the Elasticsearch instance, there is data with the Datastream.name \"at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature\" within the index name \"at.srfg.iot-iot4cps-wp5.infraprov.internal-*\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Starting to scroll\n",
+ "Fetched 0 tuples.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature | \n",
+ "
\n",
+ " \n",
+ " phenomenonTime | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get data for an index and a quantity between two static timestamps\n",
+ "startdt=\"2019-08-07T08:58:34+00:00\"\n",
+ "enddt=\"2019-08-07T11:58:34+00:00\"\n",
+ "df = scroller(\"at.srfg.iot-iot4cps-wp5.infraprov.internal-*\",\n",
+ " \"at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature\",\n",
+ " startdt=startdt, enddt=enddt)\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Starting to scroll\n",
+ "Fetched 0 tuples.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature | \n",
+ "
\n",
+ " \n",
+ " phenomenonTime | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Get data for an index and a quantity of the latest timerange\n",
+ "df = scroller(\"at.srfg.iot-iot4cps-wp5.infraprov.internal-*\",\n",
+ " \"at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature\",\n",
+ " timerange=timedelta(days=10))\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Plot the extracted data using pandas and seaborn\n",
+ "df.plot()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Get multiple quantities and (outer) join them to a single DataFrame.\n",
+ "# There can be a lot of missing values\n",
+ "used_quantities = [\"at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature\", \n",
+ " \"at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Starting to scroll.....\n",
+ "Fetched 48 tuples.\n",
+ "at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature\n",
+ "Starting to scroll\n",
+ "Fetched 0 tuples.\n"
+ ]
+ }
+ ],
+ "source": [
+ "df = scroller(\"at.srfg.iot-iot4cps-wp5.infraprov.internal-*\",\n",
+ " used_quantities[0],\n",
+ " timerange=timedelta(days=10))\n",
+ "for q in used_quantities[1:]:\n",
+ " print(q)\n",
+ " df = df.join(scroller(\"at.srfg.iot-iot4cps-wp5.infraprov.internal-*\", q,\n",
+ " timerange=timedelta(days=10)),\n",
+ " how=\"outer\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature | \n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature | \n",
+ "
\n",
+ " \n",
+ " phenomenonTime | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2019-08-07 09:32:50.600000+00:00 | \n",
+ " -1.396915 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:33:40.700000+00:00 | \n",
+ " -2.559881 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:34:10.700000+00:00 | \n",
+ " -3.360251 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:32:10.600000+00:00 | \n",
+ " -0.112741 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:32:40.600000+00:00 | \n",
+ " -0.956904 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature \\\n",
+ "phenomenonTime \n",
+ "2019-08-07 09:32:50.600000+00:00 -1.396915 \n",
+ "2019-08-07 09:33:40.700000+00:00 -2.559881 \n",
+ "2019-08-07 09:34:10.700000+00:00 -3.360251 \n",
+ "2019-08-07 09:32:10.600000+00:00 -0.112741 \n",
+ "2019-08-07 09:32:40.600000+00:00 -0.956904 \n",
+ "\n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature \n",
+ "phenomenonTime \n",
+ "2019-08-07 09:32:50.600000+00:00 NaN \n",
+ "2019-08-07 09:33:40.700000+00:00 NaN \n",
+ "2019-08-07 09:34:10.700000+00:00 NaN \n",
+ "2019-08-07 09:32:10.600000+00:00 NaN \n",
+ "2019-08-07 09:32:40.600000+00:00 NaN "
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Store and retrieve the DataFrame in a csv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv(\"elasticsearchdata.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature | \n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature | \n",
+ "
\n",
+ " \n",
+ " phenomenonTime | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2019-08-07 09:39:01.100000+00:00 | \n",
+ " -3.039692 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:39:51.100000+00:00 | \n",
+ " -1.599475 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:39:21.100000+00:00 | \n",
+ " -2.488179 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:39:31.100000+00:00 | \n",
+ " -2.259640 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:40:01.200000+00:00 | \n",
+ " -1.246612 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature \\\n",
+ "phenomenonTime \n",
+ "2019-08-07 09:39:01.100000+00:00 -3.039692 \n",
+ "2019-08-07 09:39:51.100000+00:00 -1.599475 \n",
+ "2019-08-07 09:39:21.100000+00:00 -2.488179 \n",
+ "2019-08-07 09:39:31.100000+00:00 -2.259640 \n",
+ "2019-08-07 09:40:01.200000+00:00 -1.246612 \n",
+ "\n",
+ " at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature \n",
+ "phenomenonTime \n",
+ "2019-08-07 09:39:01.100000+00:00 NaN \n",
+ "2019-08-07 09:39:51.100000+00:00 NaN \n",
+ "2019-08-07 09:39:21.100000+00:00 NaN \n",
+ "2019-08-07 09:39:31.100000+00:00 NaN \n",
+ "2019-08-07 09:40:01.200000+00:00 NaN "
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"elasticsearchdata.csv\", parse_dates=True, index_col='phenomenonTime')\n",
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Pre-processing"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Reduce size and interpolate"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"elasticsearchdata.csv\", parse_dates=True, index_col='phenomenonTime')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.index.names = [\"time\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "col_mapping = {\"at.srfg.iot-iot4cps-wp5.CarFleet1.car_1.Air Temperature\": \"car1_temp\", \n",
+ " \"at.srfg.iot-iot4cps-wp5.CarFleet2.car_2.Air Temperature\": \"car2_temp\"}\n",
+ "df = df.rename(index=str, \n",
+ " columns=col_mapping)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " car1_temp | \n",
+ " car2_temp | \n",
+ "
\n",
+ " \n",
+ " time | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2019-08-07 09:32:50.600000+00:00 | \n",
+ " -1.396915 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:33:40.700000+00:00 | \n",
+ " -2.559881 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:34:10.700000+00:00 | \n",
+ " -3.360251 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:32:10.600000+00:00 | \n",
+ " -0.112741 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2019-08-07 09:32:40.600000+00:00 | \n",
+ " -0.956904 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " car1_temp car2_temp\n",
+ "time \n",
+ "2019-08-07 09:32:50.600000+00:00 -1.396915 NaN\n",
+ "2019-08-07 09:33:40.700000+00:00 -2.559881 NaN\n",
+ "2019-08-07 09:34:10.700000+00:00 -3.360251 NaN\n",
+ "2019-08-07 09:32:10.600000+00:00 -0.112741 NaN\n",
+ "2019-08-07 09:32:40.600000+00:00 -0.956904 NaN"
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Interpolate forwards and backwaonly up to \n",
+ "df = df.interpolate(method ='linear', limit_direction ='both', limit=10)\n",
+ "df = df.interpolate(method ='linear', limit_direction ='both', limit=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Keep only the rows with at least 2 non-NA values.\n",
+ "df = df.dropna(thresh=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# make Timestamp unique\n",
+ "df = df.reset_index()\n",
+ "df = df.groupby(\"time\").agg({q: \"mean\" for q in col_mapping.values()})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Interpolate again to close gaps, use the smalles value \n",
+ "df = df.interpolate(method ='zero', limit_direction ='forward')\n",
+ "df = df.interpolate(method ='zero', limit_direction ='forward')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "nan"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.index.min()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0, 2)"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "car1_temp 0\n",
+ "car2_temp 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " car1_temp | \n",
+ " car2_temp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " car1_temp car2_temp\n",
+ "count 0.0 0.0\n",
+ "mean NaN NaN\n",
+ "std NaN NaN\n",
+ "min NaN NaN\n",
+ "25% NaN NaN\n",
+ "50% NaN NaN\n",
+ "75% NaN NaN\n",
+ "max NaN NaN"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Keep only rows with all filled rows\n",
+ "df = df.dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv(\"elasticsearchdata.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Basic Data Analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " car1_temp | \n",
+ " car2_temp | \n",
+ "
\n",
+ " \n",
+ " time | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [car1_temp, car2_temp]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"elasticsearchdata.csv\", parse_dates=True, index_col='time')\n",
+ "df.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df.hist()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pd.plotting.scatter_matrix(df, alpha=0.2)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# corr = df.corr() \n",
+ "cm = sns.light_palette(\"orange\", as_cmap=True) \n",
+ "cm = sns.diverging_palette(220, 20, sep=20, as_cmap=True) \n",
+ "# corr.style.background_gradient(cmap=cm).set_precision(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Feature Engineering\n",
+ "\n",
+ "This task is very domain-specific and must be done by an expert."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Data Analytics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "75cfeeea6204489e945d894895b2bc30",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "interactive(children=(IntSlider(value=45, description='pitch', max=90), IntSlider(value=45, description='yaw',…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.html.widgets import *\n",
+ "from mpl_toolkits.mplot3d import Axes3D\n",
+ "\n",
+ "plt.rcParams[\"figure.figsize\"] = (18,10)\n",
+ "sns.set(style=\"darkgrid\")\n",
+ "\n",
+ "def plot3D(pitch, yaw):\n",
+ " fig = plt.figure()\n",
+ " ax = fig.add_subplot(111, projection='3d')\n",
+ " plot = ax.scatter(df['car1_temp'], df['car1_temp'], df['car2_temp'], c=df[\"car1_temp\"], s=60)\n",
+ " fig.colorbar(plot)\n",
+ " ax.view_init(pitch, yaw)\n",
+ " ax.legend(['Vibration for each 3D position'])\n",
+ " ax.set_xlabel(\"x-Position\")\n",
+ " ax.set_ylabel(\"y-Position\")\n",
+ " ax.set_zlabel(\"z-Position\")\n",
+ "interact(plot3D, pitch=(0,90,1), yaw=(0,90,1))\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# df[col_mapping.values()].hist()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# pd.plotting.scatter_matrix(df[[\"vib\", \"distance\", \"projection\", \"v-radial\", \"v-tang\"]], alpha=0.5)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# bins = np.linspace(0, df['v-radial'].max(), 10)\n",
+ "# df[\"binned-v-radial\"] = pd.cut(df['v-radial'], bins)\n",
+ "# df.groupby(\"binned-v-radial\").agg({\"vib\": {\"min\", \"median\", \"mean\", \"max\", \"count\"}})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# corr = df[df.columns.sort_values()].corr()[[\"vib\", \"vib-x\", \"vib-y\"]]\n",
+ "# cm = sns.light_palette(\"orange\", as_cmap=True) \n",
+ "# cm = sns.diverging_palette(220, 20, sep=20, as_cmap=True) \n",
+ "# corr.style.background_gradient(cmap=cm).set_precision(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "\n",
+ "## Inline Description:\n",
+ "\n",
+ "It is very nice to describe results using Markdown with dynamic values like: **3.141592653589793**.\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from IPython.display import Markdown \n",
+ "Markdown(\"\"\"\n",
+ "## Inline Description:\n",
+ "\n",
+ "It is very nice to describe results using Markdown with dynamic values like: **{pi}**.\n",
+ "\n",
+ "\"\"\".format(pi=np.pi))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Machine Learning\n",
+ "\n",
+ "**Be careful when handling with Artificial Intelligence:**\n",
+ "\n",
+ "![Be careful when handling with Artificial Intelligence](https://imgs.xkcd.com/comics/twitter_bot.png)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/extra/Getting_Started/GPU-processing.ipynb b/extra/Getting_Started/GPU-processing.ipynb
new file mode 100644
index 0000000..fae5cde
--- /dev/null
+++ b/extra/Getting_Started/GPU-processing.ipynb
@@ -0,0 +1,417 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# GPU-Jupyter\n",
+ "\n",
+ "This Jupyterlab Instance is connected to the GPU via CUDA drivers. In this notebook, we test the installation and perform some basic operations on the GPU."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Test GPU connection\n",
+ "\n",
+ "#### Using the following command, your GPU type and its NVIDIA-SMI driver version should be listed:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fri Dec 20 09:42:29 2019 \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 440.26 Driver Version: 440.26 CUDA Version: 10.2 |\n",
+ "|-------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
+ "|===============================+======================+======================|\n",
+ "| 0 GeForce RTX 207... Off | 00000000:01:00.0 Off | N/A |\n",
+ "| 0% 54C P0 38W / 215W | 204MiB / 7974MiB | 0% Default |\n",
+ "+-------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| Processes: GPU Memory |\n",
+ "| GPU PID Type Process name Usage |\n",
+ "|=============================================================================|\n",
+ "+-----------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Now, test if PyTorch can access the GPU via CUDA:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import torch\n",
+ "torch.cuda.is_available()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[name: \"/device:CPU:0\"\n",
+ " device_type: \"CPU\"\n",
+ " memory_limit: 268435456\n",
+ " locality {\n",
+ " }\n",
+ " incarnation: 891330946073693377, name: \"/device:XLA_CPU:0\"\n",
+ " device_type: \"XLA_CPU\"\n",
+ " memory_limit: 17179869184\n",
+ " locality {\n",
+ " }\n",
+ " incarnation: 9415777875944419380\n",
+ " physical_device_desc: \"device: XLA_CPU device\"]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from tensorflow.python.client import device_lib\n",
+ "device_lib.list_local_devices()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "tensor([[0.8722, 0.5115, 0.9504],\n",
+ " [0.7723, 0.2860, 0.5793],\n",
+ " [0.5388, 0.5681, 0.4295],\n",
+ " [0.5269, 0.5165, 0.7475],\n",
+ " [0.4882, 0.8255, 0.6498]])"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from __future__ import print_function\n",
+ "import numpy as np\n",
+ "import torch\n",
+ "a = torch.rand(5, 3)\n",
+ "a"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Performance test\n",
+ "\n",
+ "#### Now we want to know how much faster a typical operation is using GPU. Therefore we do the same operation in numpy, PyTorch and PyTorch with CUDA. The test operation is the calculation of the prediction matrix that is done in a linear regression."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 1) Numpy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = np.random.rand(10000, 256)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "590 ms ± 41.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "H = x.dot(np.linalg.inv(x.transpose().dot(x))).dot(x.transpose())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2) PyTorch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x = torch.rand(10000, 256)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "853 ms ± 16.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "# Calculate the projection matrix of x\n",
+ "H = x.mm( (x.t().mm(x)).inverse() ).mm(x.t())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3) PyTorch on GPU via CUDA"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tensor([[0.1054, 0.3291, 0.7729, 0.6005, 0.2372],\n",
+ " [0.1022, 0.4534, 0.3964, 0.9174, 0.2610],\n",
+ " [0.3969, 0.5472, 0.3876, 0.1979, 0.4063],\n",
+ " [0.3630, 0.6374, 0.4176, 0.4804, 0.0396],\n",
+ " [0.8256, 0.2289, 0.2265, 0.4388, 0.6070]], device='cuda:0')\n",
+ "tensor([[0.1054, 0.3291, 0.7729, 0.6005, 0.2372],\n",
+ " [0.1022, 0.4534, 0.3964, 0.9174, 0.2610],\n",
+ " [0.3969, 0.5472, 0.3876, 0.1979, 0.4063],\n",
+ " [0.3630, 0.6374, 0.4176, 0.4804, 0.0396],\n",
+ " [0.8256, 0.2289, 0.2265, 0.4388, 0.6070]], dtype=torch.float64)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# let us run this cell only if CUDA is available\n",
+ "# We will use ``torch.device`` objects to move tensors in and out of GPU\n",
+ "if torch.cuda.is_available():\n",
+ " device = torch.device(\"cuda\") # a CUDA device object\n",
+ " x = torch.rand(10000, 256, device=device) # directly create a tensor on GPU\n",
+ " y = x.to(device) # or just use strings ``.to(\"cuda\")``\n",
+ " print(x[0:5, 0:5])\n",
+ " print(y.to(\"cpu\", torch.double)[0:5, 0:5])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "11.3 ms ± 60.3 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit\n",
+ "H = x.mm( (x.t().mm(x)).inverse() ).mm(x.t())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Exhaustive Testing on GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# let us run this cell only if CUDA is available\n",
+ "# We will use ``torch.device`` objects to move tensors in and out of GPU\n",
+ "import torch\n",
+ "if torch.cuda.is_available():\n",
+ " device = torch.device(\"cuda\") # a CUDA device object\n",
+ " x = torch.rand(10000, 10, device=device) # directly create a tensor on GPU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tensor([[0.3112, 0.7480, 0.1882, 0.8453, 0.8198],\n",
+ " [0.5953, 0.8401, 0.3126, 0.6025, 0.5252],\n",
+ " [0.1902, 0.5610, 0.7968, 0.1463, 0.7154],\n",
+ " [0.7979, 0.2161, 0.6176, 0.2951, 0.1980],\n",
+ " [0.6451, 0.3837, 0.5305, 0.2740, 0.3330]], device='cuda:0')\n"
+ ]
+ }
+ ],
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " y = x.to(device) # or just use strings ``.to(\"cuda\")``\n",
+ " print(x[0:5, 0:5])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " # Here is the memory of the GPU a border. \n",
+ " # A matrix with 100000 lines requires 37 GB, but only 8 GB are available.\n",
+ " H = x.mm( (x.t().mm(x)).inverse() ).mm(x.t())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tensor([[ 1.2748e-03, 5.3656e-04, 1.7376e-04, 3.3888e-06, -1.7049e-04],\n",
+ " [ 5.3656e-04, 6.3624e-04, 2.5957e-05, 3.3281e-04, -1.6239e-05],\n",
+ " [ 1.7376e-04, 2.5957e-05, 7.6328e-04, 7.7603e-05, 1.8272e-04],\n",
+ " [ 3.3888e-06, 3.3281e-04, 7.7603e-05, 9.6281e-04, 1.2375e-04],\n",
+ " [-1.7049e-04, -1.6239e-05, 1.8272e-04, 1.2375e-04, 6.9231e-04]],\n",
+ " device='cuda:0')\n"
+ ]
+ }
+ ],
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " print(H[0:5, 0:5])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "tensor([[ 1.2748e-03, 5.3656e-04, 1.7376e-04, 3.3888e-06, -1.7049e-04],\n",
+ " [ 5.3656e-04, 6.3624e-04, 2.5957e-05, 3.3281e-04, -1.6239e-05],\n",
+ " [ 1.7376e-04, 2.5957e-05, 7.6328e-04, 7.7603e-05, 1.8272e-04],\n",
+ " [ 3.3888e-06, 3.3281e-04, 7.7603e-05, 9.6281e-04, 1.2375e-04],\n",
+ " [-1.7049e-04, -1.6239e-05, 1.8272e-04, 1.2375e-04, 6.9231e-04]],\n",
+ " dtype=torch.float64)\n"
+ ]
+ }
+ ],
+ "source": [
+ "if torch.cuda.is_available():\n",
+ " # This operation is difficult, as an symmetric matrix is transferred \n",
+ " # back to the CPU. Is possible up to 30000 rows.\n",
+ " print(H.to(\"cpu\", torch.double)[0:5, 0:5])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/extra/Getting_Started/JuliaQuickstart.ipynb b/extra/Getting_Started/JuliaQuickstart.ipynb
new file mode 100755
index 0000000..6f9e005
--- /dev/null
+++ b/extra/Getting_Started/JuliaQuickstart.ipynb
@@ -0,0 +1,5742 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Demo script for Julia language\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Basic Interaction & Plotting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.024"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# In Julia, calculations can be written very comfortable\n",
+ "n = 10\n",
+ "(2^n - 10^(0.3*n)) / 10^(0.3*n)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "using IJulia\n",
+ "using Gadfly\n",
+ "using RDatasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This is how additional packages can be imported\n",
+ "# import Pkg\n",
+ "# Pkg.add(\"RDatasets\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "iris = dataset(\"datasets\", \"iris\");"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "┌ Warning: `getindex(df::DataFrame, col_ind::ColumnIndex)` is deprecated, use `df[!, col_ind]` instead.\n",
+ "│ caller = evalmapping(::DataFrame, ::Symbol) at dataframes.jl:96\n",
+ "â”” @ Gadfly /opt/julia/packages/Gadfly/1wgcD/src/dataframes.jl:96\n"
+ ]
+ },
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n"
+ ],
+ "text/html": [
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "Plot(...)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "plot(iris, x=:SepalLength, y=:PetalWidth, color=:Species, Geom.point)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Python $\\leftrightarrow$ Julia interoperability\n",
+ "\n",
+ "## Julia Kernel in Python notebook\n",
+ "\n",
+ "To use the `%%julia` magic in a notebook with another kernel, add `julia` in the defaults in the file `/opt/conda/lib/python3.7/site-packages/IPython/core/magics/script.py` (using nano in a terminal).\n",
+ "\n",
+ "Like here:\n",
+ "```python\n",
+ "defaults = [\n",
+ " 'sh',\n",
+ " 'bash',\n",
+ " 'perl',\n",
+ " 'ruby',\n",
+ " 'python',\n",
+ " 'python2',\n",
+ " 'python3',\n",
+ " 'pypy',\n",
+ " 'julia', # add the julia interpreter\n",
+ "]\n",
+ "```\n",
+ "\n",
+ "Then restart the kernal and use the julia kernel in a non-julia notebook with:\n",
+ "\n",
+ "```python\n",
+ "%%julia\n",
+ "rfib(n) = n < 2 ? n : rfib(n-1) + rfib(n-2)\n",
+ "rfib(12)\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Python Modules in Julia\n",
+ "\n",
+ "**Important: This funcionality is still in progress, see https://github.com/JuliaPy**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m registry at `/opt/julia/registries/General`\n",
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m git-repo `https://github.com/JuliaRegistries/General.git`\n",
+ "\u001b[?25l\u001b[2K\u001b[?25h\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions...\n",
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/opt/julia/environments/v1.1/Project.toml`\n",
+ "\u001b[90m [no changes]\u001b[39m\n",
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `/opt/julia/environments/v1.1/Manifest.toml`\n",
+ "\u001b[90m [no changes]\u001b[39m\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Install if not already done\n",
+ "import Pkg; Pkg.add(\"PyCall\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "using PyCall"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PyObject "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np = pyimport(\"numpy\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Did it work?\n",
+ " -> true\n"
+ ]
+ }
+ ],
+ "source": [
+ "println(\"Did it work?\\n -> \", np.sin(np.pi/2) == 1.0)"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Julia 1.1.0",
+ "language": "julia",
+ "name": "julia-1.1"
+ },
+ "language_info": {
+ "file_extension": ".jl",
+ "mimetype": "application/julia",
+ "name": "julia",
+ "version": "1.1.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/extra/Getting_Started/JupyterBasics.ipynb b/extra/Getting_Started/JupyterBasics.ipynb
new file mode 100755
index 0000000..52b7f35
--- /dev/null
+++ b/extra/Getting_Started/JupyterBasics.ipynb
@@ -0,0 +1,337 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Getting Started\n",
+ "\n",
+ "**Markdown** is a method to write documentations and even embed `HTML` and `Latex` formulas.\n",
+ "\n",
+ "### Jupyter tutorial\n",
+ "\n",
+ "[![Jupyter Youtube Tutorial](https://img.youtube.com/vi/CwFq3YDU6_Y/0.jpg)](https://www.youtube.com/watch?v=CwFq3YDU6_Y?rel=0&showinfo=0)\n",
+ "\n",
+ "\n",
+ "### Bayesian Rule\n",
+ "$$P(A \\mid B) = \\frac{P(B \\mid A)P(A)}{P(B)}$$"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Coding and magic commands"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[1, 8, 27, 64, 125, 216, 343]"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[x**3 for x in range(1,8)]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/json": {
+ "cell": {
+ "!": "OSMagics",
+ "HTML": "Other",
+ "SVG": "Other",
+ "bash": "Other",
+ "capture": "ExecutionMagics",
+ "debug": "ExecutionMagics",
+ "file": "Other",
+ "html": "DisplayMagics",
+ "javascript": "DisplayMagics",
+ "js": "DisplayMagics",
+ "latex": "DisplayMagics",
+ "markdown": "DisplayMagics",
+ "perl": "Other",
+ "prun": "ExecutionMagics",
+ "pypy": "Other",
+ "python": "Other",
+ "python2": "Other",
+ "python3": "Other",
+ "ruby": "Other",
+ "script": "ScriptMagics",
+ "sh": "Other",
+ "svg": "DisplayMagics",
+ "sx": "OSMagics",
+ "system": "OSMagics",
+ "time": "ExecutionMagics",
+ "timeit": "ExecutionMagics",
+ "writefile": "OSMagics"
+ },
+ "line": {
+ "alias": "OSMagics",
+ "alias_magic": "BasicMagics",
+ "autoawait": "AsyncMagics",
+ "autocall": "AutoMagics",
+ "automagic": "AutoMagics",
+ "autosave": "KernelMagics",
+ "bookmark": "OSMagics",
+ "cat": "Other",
+ "cd": "OSMagics",
+ "clear": "KernelMagics",
+ "colors": "BasicMagics",
+ "conda": "PackagingMagics",
+ "config": "ConfigMagics",
+ "connect_info": "KernelMagics",
+ "cp": "Other",
+ "debug": "ExecutionMagics",
+ "dhist": "OSMagics",
+ "dirs": "OSMagics",
+ "doctest_mode": "BasicMagics",
+ "ed": "Other",
+ "edit": "KernelMagics",
+ "env": "OSMagics",
+ "gui": "BasicMagics",
+ "hist": "Other",
+ "history": "HistoryMagics",
+ "killbgscripts": "ScriptMagics",
+ "ldir": "Other",
+ "less": "KernelMagics",
+ "lf": "Other",
+ "lk": "Other",
+ "ll": "Other",
+ "load": "CodeMagics",
+ "load_ext": "ExtensionMagics",
+ "loadpy": "CodeMagics",
+ "logoff": "LoggingMagics",
+ "logon": "LoggingMagics",
+ "logstart": "LoggingMagics",
+ "logstate": "LoggingMagics",
+ "logstop": "LoggingMagics",
+ "ls": "Other",
+ "lsmagic": "BasicMagics",
+ "lx": "Other",
+ "macro": "ExecutionMagics",
+ "magic": "BasicMagics",
+ "man": "KernelMagics",
+ "matplotlib": "PylabMagics",
+ "mkdir": "Other",
+ "more": "KernelMagics",
+ "mv": "Other",
+ "notebook": "BasicMagics",
+ "page": "BasicMagics",
+ "pastebin": "CodeMagics",
+ "pdb": "ExecutionMagics",
+ "pdef": "NamespaceMagics",
+ "pdoc": "NamespaceMagics",
+ "pfile": "NamespaceMagics",
+ "pinfo": "NamespaceMagics",
+ "pinfo2": "NamespaceMagics",
+ "pip": "PackagingMagics",
+ "popd": "OSMagics",
+ "pprint": "BasicMagics",
+ "precision": "BasicMagics",
+ "prun": "ExecutionMagics",
+ "psearch": "NamespaceMagics",
+ "psource": "NamespaceMagics",
+ "pushd": "OSMagics",
+ "pwd": "OSMagics",
+ "pycat": "OSMagics",
+ "pylab": "PylabMagics",
+ "qtconsole": "KernelMagics",
+ "quickref": "BasicMagics",
+ "recall": "HistoryMagics",
+ "rehashx": "OSMagics",
+ "reload_ext": "ExtensionMagics",
+ "rep": "Other",
+ "rerun": "HistoryMagics",
+ "reset": "NamespaceMagics",
+ "reset_selective": "NamespaceMagics",
+ "rm": "Other",
+ "rmdir": "Other",
+ "run": "ExecutionMagics",
+ "save": "CodeMagics",
+ "sc": "OSMagics",
+ "set_env": "OSMagics",
+ "store": "StoreMagics",
+ "sx": "OSMagics",
+ "system": "OSMagics",
+ "tb": "ExecutionMagics",
+ "time": "ExecutionMagics",
+ "timeit": "ExecutionMagics",
+ "unalias": "OSMagics",
+ "unload_ext": "ExtensionMagics",
+ "who": "NamespaceMagics",
+ "who_ls": "NamespaceMagics",
+ "whos": "NamespaceMagics",
+ "xdel": "NamespaceMagics",
+ "xmode": "BasicMagics"
+ }
+ },
+ "text/plain": [
+ "Available line magics:\n",
+ "%alias %alias_magic %autoawait %autocall %automagic %autosave %bookmark %cat %cd %clear %colors %conda %config %connect_info %cp %debug %dhist %dirs %doctest_mode %ed %edit %env %gui %hist %history %killbgscripts %ldir %less %lf %lk %ll %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %lx %macro %magic %man %matplotlib %mkdir %more %mv %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pip %popd %pprint %precision %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %rep %rerun %reset %reset_selective %rm %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode\n",
+ "\n",
+ "Available cell magics:\n",
+ "%%! %%HTML %%SVG %%bash %%capture %%debug %%file %%html %%javascript %%js %%latex %%markdown %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile\n",
+ "\n",
+ "Automagic is ON, % prefix IS NOT needed for line magics."
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# List all magic commands\n",
+ "%lsmagic"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np \n",
+ "x = np.random.normal(0, 1, 1_000_000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1.03 ms ± 4.93 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%timeit \n",
+ "x.mean()**2 - (x**2).mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Send variables back and fourth in bash\n",
+ "\n",
+ "The strings will be saved as files in the working directory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "string_0\n",
+ "string_1\n",
+ "string_2\n",
+ "string_3\n",
+ "string_4\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i in range(5):\n",
+ " string = \"string_{}\".format(i)\n",
+ " print(string)\n",
+ " !touch $string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "-rw-r--r-- 1 jovyan users 0 Dec 19 08:44 string_0\n",
+ "-rw-r--r-- 1 jovyan users 0 Dec 19 08:44 string_1\n",
+ "-rw-r--r-- 1 jovyan users 0 Dec 19 08:44 string_2\n",
+ "-rw-r--r-- 1 jovyan users 0 Dec 19 08:44 string_3\n",
+ "-rw-r--r-- 1 jovyan users 0 Dec 19 08:44 string_4\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ls -l | grep string_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['string_0', 'string_1', 'string_2', 'string_3', 'string_4']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "files = !ls -1 string_*\n",
+ "!rm string_*\n",
+ "files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/extra/Getting_Started/MultiInterpreterNotebook.ipynb b/extra/Getting_Started/MultiInterpreterNotebook.ipynb
new file mode 100755
index 0000000..ed97de3
--- /dev/null
+++ b/extra/Getting_Started/MultiInterpreterNotebook.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Python and R in a single Notebook \n",
+ "\n",
+ "From the instructions of [all-spark-notebook](https://github.com/jupyter/docker-stacks/tree/master/all-spark-notebook) and [Stackoverflow](https://stackoverflow.com/questions/39008069/r-and-python-in-one-jupyter-notebook).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Last run: 2019-12-19 09:09:24.610738 UTC\n"
+ ]
+ }
+ ],
+ "source": [
+ "import datetime\n",
+ "print('Last run:', datetime.datetime.utcnow(), 'UTC') # timezone can't be detected from browser"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Processing in Python\n",
+ "* Create a dataframe\n",
+ "* load the rpy2 module"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "n = 100\n",
+ "df = pd.DataFrame({\n",
+ " 'cups_of_coffee': np.random.exponential(3, size=n),\n",
+ " 'productivity': np.random.normal(100, 15, n),\n",
+ " 'gender': np.random.choice([\"f\", \"m\"], n)\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext rpy2.ipython"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Processing in R\n",
+ "\n",
+ "* Install and load ggplot2\n",
+ "* Receive the dataframe and set the figure size including resolution\n",
+ "* Plot the dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%R\n",
+ "#install.packages(\"ggplot2\", repos='http://cran.us.r-project.org', quiet=TRUE)\n",
+ "library(ggplot2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:191: FutureWarning: from_items is deprecated. Please use DataFrame.from_dict(dict(items), ...) instead. DataFrame.from_dict(OrderedDict(items)) may be used to preserve the key order.\n",
+ " res = PandasDataFrame.from_items(items)\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%%R -i df -w 10 -h 10 --units cm -r 200\n",
+ "ggplot(df, aes(x=cups_of_coffee, y=productivity, color=gender)) + geom_point() + \n",
+ "geom_smooth(method=\"lm\", se=TRUE) + ylab(\"Normalized Productivity\") + xlab(\"Cups of Coffee per day\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/extra/Getting_Started/jupyterlab-overview.png b/extra/Getting_Started/jupyterlab-overview.png
new file mode 100755
index 0000000..6618e53
Binary files /dev/null and b/extra/Getting_Started/jupyterlab-overview.png differ
diff --git a/extra/Getting_Started/scroller.py b/extra/Getting_Started/scroller.py
new file mode 100755
index 0000000..82de8e8
--- /dev/null
+++ b/extra/Getting_Started/scroller.py
@@ -0,0 +1,57 @@
+def scroller(index, quantity, timerange=timedelta(days=0), startdt="", enddt=""):
+ print("Starting to scroll", end='')
+ # Retrieve the datetimes, note that timerange has a higher priority
+ if timerange.total_seconds() > 0:
+ now = datetime.utcnow().replace(tzinfo=pytz.UTC)
+ startdt = (now - timerange).isoformat()
+ enddt = now.isoformat()
+
+ # search the first page and write the result to data
+ response = es.search(
+ index=index,
+ body={
+ "query": {
+ "bool": {
+ "must": [
+ {"range" : {
+ "phenomenonTime" : {
+ #"gte": "2018-02-20T09:08:34.230693+00:00",
+ "gte": startdt,
+ "lte": enddt,
+ "time_zone": "+01:00"
+ }
+ }},
+ {
+ "match_phrase": {
+ "Datastream.name.keyword": quantity
+ }
+ }
+ ]
+ }
+ }
+ },
+ scroll='10m'
+ )
+ data = [[row["_source"]["phenomenonTime"], row["_source"]["result"]] for row in response['hits']['hits']]
+
+ # Append new pages until there aren't any left
+ while len(response['hits']['hits']):
+ print(".", end='')
+ # process results
+ # print([item["_id"] for item in response["hits"]["hits"]])
+ response = es.scroll(scroll_id=response['_scroll_id'], scroll='10m')
+ data += [[row["_source"]["phenomenonTime"], row["_source"]["result"]] for row in response['hits']['hits']]
+
+ # Convert data to a DataFrame and return it
+ df = pd.DataFrame(data, columns=["phenomenonTime", quantity])
+ # df.index = pd.to_datetime(df["phenomenonTime"].map(lambda t: t.split(".")[0]), utc=True)
+ df.index = pd.to_datetime(df["phenomenonTime"].map(lambda t: roundto(t, 1)), utc=True)
+ df = df.drop(["phenomenonTime"], axis=1)
+ print("\nFetched {} tuples.".format(df.shape[0]))
+ return df
+
+def roundto(string, n):
+ base = string.split(".")[0]
+ if n > 0:
+ base += "." + string.split(".")[1][:n]
+ return base
diff --git a/extra/performance-test.png b/extra/performance-test.png
new file mode 100644
index 0000000..050731f
Binary files /dev/null and b/extra/performance-test.png differ
diff --git a/src/Dockerfile b/src/Dockerfile
index b0acda4..2dd636c 100644
--- a/src/Dockerfile
+++ b/src/Dockerfile
@@ -417,6 +417,44 @@ RUN conda install -y -c pytorch \
############################ Useful packages ###############################
############################################################################
+# Update conda
+RUN conda update -n base conda -y
+
+# Install elasticsearch libs
+USER root
+RUN apt-get update \
+ && curl -sL http://central.maven.org/maven2/org/elasticsearch/elasticsearch-hadoop/6.8.1/elasticsearch-hadoop-6.8.1.jar
+RUN pip install --no-cache-dir elasticsearch==7.1.0
+
+# Install rpy2 to share data between Python and R
+RUN conda install rpy2=2.9.4 plotly=4.4.1
+RUN conda install -c conda-forge ipyleaflet
+
+# Install important packages and Graphviz
+RUN set -ex \
+ && buildDeps=' \
+ graphviz==0.11 \
+' \
+ && apt-get update \
+ && apt-get -y install htop apt-utils graphviz libgraphviz-dev \
+ && pip install --no-cache-dir $buildDeps
+
+# Install various extensions
+RUN jupyter labextension install @jupyterlab/github
+RUN jupyter labextension install jupyterlab-drawio
+RUN jupyter labextension install jupyter-leaflet
+RUN jupyter labextension install @jupyterlab/plotly-extension
+RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager
+RUN pip install --no-cache-dir jupyter-tabnine==1.0.2 && \
+ jupyter nbextension install --py jupyter_tabnine && \
+ jupyter nbextension enable --py jupyter_tabnine && \
+ jupyter serverextension enable --py jupyter_tabnine
+RUN conda install -c conda-forge jupyter_contrib_nbextensions && \
+ conda install -c conda-forge jupyter_nbextensions_configurator && \
+ jupyter nbextension enable codefolding/main
+RUN jupyter labextension install @ijmbarr/jupyterlab_spellchecker
+
+# Copying config and fix permissions
COPY jupyter_notebook_config.json /etc/jupyter/
RUN fix-permissions /home/$NB_USER