{
"cells": [
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"import pandas as pd\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"from sklearn import linear_model\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"from sklearn import metrics\n",
"\n",
"sns.set()\n",
"sns.set(style=\"whitegrid\")\n",
"tips = sns.load_dataset(\"tips\")\n",
"plt.rcParams[\"figure.figsize\"] = (8,5)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"\n",
"def precision_stats(model, y, X):\n",
" pred_y = model.predict(X)\n",
" coefficients = model.coef_\n",
" intercept = model.intercept_\n",
" MSE = metrics.mean_squared_error(y,pred_y)\n",
" # these 3 are all the saem\n",
" score = model.score(X,y)\n",
" explained_var = metrics.explained_variance_score(y, pred_y)\n",
" R2 = metrics.r2_score(y, pred_y)\n",
" # residuals\n",
" res = y - pred_y \n",
" print(\"Residuals info\", res.describe())\n",
" \n",
" print(\"coefficients:\", coefficients)\n",
" print(\"intercept\", intercept)\n",
" \n",
" print(\"MSE\", MSE)\n",
" print(\"explained variance\", explained_var)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" crim | \n",
" zn | \n",
" indus | \n",
" chas | \n",
" nox | \n",
" rm | \n",
" age | \n",
" dis | \n",
" rad | \n",
" tax | \n",
" ptratio | \n",
" black | \n",
" lstat | \n",
" medv | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 0.00632 | \n",
" 18.0 | \n",
" 2.31 | \n",
" 0 | \n",
" 0.538 | \n",
" 6.575 | \n",
" 65.2 | \n",
" 4.0900 | \n",
" 1 | \n",
" 296 | \n",
" 15.3 | \n",
" 396.90 | \n",
" 4.98 | \n",
" 24.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 0.02731 | \n",
" 0.0 | \n",
" 7.07 | \n",
" 0 | \n",
" 0.469 | \n",
" 6.421 | \n",
" 78.9 | \n",
" 4.9671 | \n",
" 2 | \n",
" 242 | \n",
" 17.8 | \n",
" 396.90 | \n",
" 9.14 | \n",
" 21.6 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 0.02729 | \n",
" 0.0 | \n",
" 7.07 | \n",
" 0 | \n",
" 0.469 | \n",
" 7.185 | \n",
" 61.1 | \n",
" 4.9671 | \n",
" 2 | \n",
" 242 | \n",
" 17.8 | \n",
" 392.83 | \n",
" 4.03 | \n",
" 34.7 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 0.03237 | \n",
" 0.0 | \n",
" 2.18 | \n",
" 0 | \n",
" 0.458 | \n",
" 6.998 | \n",
" 45.8 | \n",
" 6.0622 | \n",
" 3 | \n",
" 222 | \n",
" 18.7 | \n",
" 394.63 | \n",
" 2.94 | \n",
" 33.4 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 0.06905 | \n",
" 0.0 | \n",
" 2.18 | \n",
" 0 | \n",
" 0.458 | \n",
" 7.147 | \n",
" 54.2 | \n",
" 6.0622 | \n",
" 3 | \n",
" 222 | \n",
" 18.7 | \n",
" 396.90 | \n",
" 5.33 | \n",
" 36.2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 crim zn indus chas nox rm age dis rad \\\n",
"0 1 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 \n",
"1 2 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 \n",
"2 3 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 \n",
"3 4 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 \n",
"4 5 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 \n",
"\n",
" tax ptratio black lstat medv \n",
"0 296 15.3 396.90 4.98 24.0 \n",
"1 242 17.8 396.90 9.14 21.6 \n",
"2 242 17.8 392.83 4.03 34.7 \n",
"3 222 18.7 394.63 2.94 33.4 \n",
"4 222 18.7 396.90 5.33 36.2 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Boston = pd.read_csv(\"../../datasets/Boston.csv\")\n",
"Boston.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Residuals info count 5.060000e+02\n",
"mean -4.437382e-15\n",
"std 6.209603e+00\n",
"min -1.516745e+01\n",
"25% -3.989612e+00\n",
"50% -1.318186e+00\n",
"75% 2.033701e+00\n",
"max 2.450013e+01\n",
"Name: medv, dtype: float64\n",
"coefficients: [-0.95004935]\n",
"intercept 34.5538408793831\n",
"MSE 38.48296722989415\n",
"explained variance 0.5441462975864798\n"
]
}
],
"source": [
"# "
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: medv R-squared: 0.544\n",
"Model: OLS Adj. R-squared: 0.543\n",
"Method: Least Squares F-statistic: 601.6\n",
"Date: Sat, 28 Mar 2020 Prob (F-statistic): 5.08e-88\n",
"Time: 09:39:54 Log-Likelihood: -1641.5\n",
"No. Observations: 506 AIC: 3287.\n",
"Df Residuals: 504 BIC: 3295.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 34.5538 0.563 61.415 0.000 33.448 35.659\n",
"lstat -0.9500 0.039 -24.528 0.000 -1.026 -0.874\n",
"==============================================================================\n",
"Omnibus: 137.043 Durbin-Watson: 0.892\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 291.373\n",
"Skew: 1.453 Prob(JB): 5.36e-64\n",
"Kurtosis: 5.319 Cond. No. 29.7\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"results = smf.ols('medv ~ lstat', data=Boston).fit()\n",
"print(results.summary())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" \n",
" \n",
" Intercept | \n",
" 33.448457 | \n",
" 35.659225 | \n",
"
\n",
" \n",
" lstat | \n",
" -1.026148 | \n",
" -0.873951 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1\n",
"Intercept 33.448457 35.659225\n",
"lstat -1.026148 -0.873951"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results.conf_int()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"