MLstuff/ISLR/notebooks/3.6.2.R.ipynb
2020-03-27 22:06:31 -03:00

339 lines
10 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"library(MASS)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<table width=\"100%\" summary=\"page for Boston {MASS}\"><tr><td>Boston {MASS}</td><td style=\"text-align: right;\">R Documentation</td></tr></table>\n",
"\n",
"<h2>\n",
"Housing Values in Suburbs of Boston\n",
"</h2>\n",
"\n",
"<h3>Description</h3>\n",
"\n",
"<p>The <code>Boston</code> data frame has 506 rows and 14 columns.\n",
"</p>\n",
"\n",
"\n",
"<h3>Usage</h3>\n",
"\n",
"<pre>\n",
"Boston\n",
"</pre>\n",
"\n",
"\n",
"<h3>Format</h3>\n",
"\n",
"<p>This data frame contains the following columns:\n",
"</p>\n",
"\n",
"<dl>\n",
"<dt><code>crim</code></dt><dd>\n",
"<p>per capita crime rate by town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>zn</code></dt><dd>\n",
"<p>proportion of residential land zoned for lots over 25,000 sq.ft.\n",
"</p>\n",
"</dd>\n",
"<dt><code>indus</code></dt><dd>\n",
"<p>proportion of non-retail business acres per town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>chas</code></dt><dd>\n",
"<p>Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).\n",
"</p>\n",
"</dd>\n",
"<dt><code>nox</code></dt><dd>\n",
"<p>nitrogen oxides concentration (parts per 10 million).\n",
"</p>\n",
"</dd>\n",
"<dt><code>rm</code></dt><dd>\n",
"<p>average number of rooms per dwelling.\n",
"</p>\n",
"</dd>\n",
"<dt><code>age</code></dt><dd>\n",
"<p>proportion of owner-occupied units built prior to 1940.\n",
"</p>\n",
"</dd>\n",
"<dt><code>dis</code></dt><dd>\n",
"<p>weighted mean of distances to five Boston employment centres.\n",
"</p>\n",
"</dd>\n",
"<dt><code>rad</code></dt><dd>\n",
"<p>index of accessibility to radial highways.\n",
"</p>\n",
"</dd>\n",
"<dt><code>tax</code></dt><dd>\n",
"<p>full-value property-tax rate per \\$10,000.\n",
"</p>\n",
"</dd>\n",
"<dt><code>ptratio</code></dt><dd>\n",
"<p>pupil-teacher ratio by town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>black</code></dt><dd>\n",
"<p><i>1000(Bk - 0.63)^2</i> where <i>Bk</i> is the proportion of blacks\n",
"by town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>lstat</code></dt><dd>\n",
"<p>lower status of the population (percent).\n",
"</p>\n",
"</dd>\n",
"<dt><code>medv</code></dt><dd>\n",
"<p>median value of owner-occupied homes in \\$1000s.\n",
"</p>\n",
"</dd>\n",
"</dl>\n",
"\n",
"\n",
"\n",
"<h3>Source</h3>\n",
"\n",
"<p>Harrison, D. and Rubinfeld, D.L. (1978)\n",
"Hedonic prices and the demand for clean air.\n",
"<em>J. Environ. Economics and Management</em>\n",
"<b>5</b>, 81&ndash;102.\n",
"</p>\n",
"<p>Belsley D.A., Kuh, E. and Welsch, R.E. (1980)\n",
"<em>Regression Diagnostics. Identifying Influential Data and Sources\n",
"of Collinearity.</em>\n",
"New York: Wiley.\n",
"</p>\n",
"\n",
"<hr /><div style=\"text-align: center;\">[Package <em>MASS</em> version 7.3-51.5 ]</div>"
],
"text/latex": [
"\\inputencoding{utf8}\n",
"\\HeaderA{Boston}{Housing Values in Suburbs of Boston}{Boston}\n",
"\\keyword{datasets}{Boston}\n",
"%\n",
"\\begin{Description}\\relax\n",
"The \\code{Boston} data frame has 506 rows and 14 columns.\n",
"\\end{Description}\n",
"%\n",
"\\begin{Usage}\n",
"\\begin{verbatim}\n",
"Boston\n",
"\\end{verbatim}\n",
"\\end{Usage}\n",
"%\n",
"\\begin{Format}\n",
"This data frame contains the following columns:\n",
"\\begin{description}\n",
"\n",
"\\item[\\code{crim}] \n",
"per capita crime rate by town.\n",
"\n",
"\\item[\\code{zn}] \n",
"proportion of residential land zoned for lots over 25,000 sq.ft.\n",
"\n",
"\\item[\\code{indus}] \n",
"proportion of non-retail business acres per town.\n",
"\n",
"\\item[\\code{chas}] \n",
"Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).\n",
"\n",
"\\item[\\code{nox}] \n",
"nitrogen oxides concentration (parts per 10 million).\n",
"\n",
"\\item[\\code{rm}] \n",
"average number of rooms per dwelling.\n",
"\n",
"\\item[\\code{age}] \n",
"proportion of owner-occupied units built prior to 1940.\n",
"\n",
"\\item[\\code{dis}] \n",
"weighted mean of distances to five Boston employment centres.\n",
"\n",
"\\item[\\code{rad}] \n",
"index of accessibility to radial highways.\n",
"\n",
"\\item[\\code{tax}] \n",
"full-value property-tax rate per \\bsl{}\\$10,000.\n",
"\n",
"\\item[\\code{ptratio}] \n",
"pupil-teacher ratio by town.\n",
"\n",
"\\item[\\code{black}] \n",
"\\eqn{1000(Bk - 0.63)^2}{} where \\eqn{Bk}{} is the proportion of blacks\n",
"by town.\n",
"\n",
"\\item[\\code{lstat}] \n",
"lower status of the population (percent).\n",
"\n",
"\\item[\\code{medv}] \n",
"median value of owner-occupied homes in \\bsl{}\\$1000s.\n",
"\n",
"\n",
"\\end{description}\n",
"\n",
"\\end{Format}\n",
"%\n",
"\\begin{Source}\\relax\n",
"Harrison, D. and Rubinfeld, D.L. (1978)\n",
"Hedonic prices and the demand for clean air.\n",
"\\emph{J. Environ. Economics and Management}\n",
"\\bold{5}, 81--102.\n",
"\n",
"Belsley D.A., Kuh, E. and Welsch, R.E. (1980)\n",
"\\emph{Regression Diagnostics. Identifying Influential Data and Sources\n",
"of Collinearity.}\n",
"New York: Wiley.\n",
"\\end{Source}"
],
"text/plain": [
"Boston package:MASS R Documentation\n",
"\n",
"_\bH_\bo_\bu_\bs_\bi_\bn_\bg _\bV_\ba_\bl_\bu_\be_\bs _\bi_\bn _\bS_\bu_\bb_\bu_\br_\bb_\bs _\bo_\bf _\bB_\bo_\bs_\bt_\bo_\bn\n",
"\n",
"_\bD_\be_\bs_\bc_\br_\bi_\bp_\bt_\bi_\bo_\bn:\n",
"\n",
" The Boston data frame has 506 rows and 14 columns.\n",
"\n",
"_\bU_\bs_\ba_\bg_\be:\n",
"\n",
" Boston\n",
" \n",
"_\bF_\bo_\br_\bm_\ba_\bt:\n",
"\n",
" This data frame contains the following columns:\n",
"\n",
" crim per capita crime rate by town.\n",
"\n",
" zn proportion of residential land zoned for lots over 25,000\n",
" sq.ft.\n",
"\n",
" indus proportion of non-retail business acres per town.\n",
"\n",
" chas Charles River dummy variable (= 1 if tract bounds river; 0\n",
" otherwise).\n",
"\n",
" nox nitrogen oxides concentration (parts per 10 million).\n",
"\n",
" rm average number of rooms per dwelling.\n",
"\n",
" age proportion of owner-occupied units built prior to 1940.\n",
"\n",
" dis weighted mean of distances to five Boston employment\n",
" centres.\n",
"\n",
" rad index of accessibility to radial highways.\n",
"\n",
" tax full-value property-tax rate per \\$10,000.\n",
"\n",
" ptratio pupil-teacher ratio by town.\n",
"\n",
" black 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by\n",
" town.\n",
"\n",
" lstat lower status of the population (percent).\n",
"\n",
" medv median value of owner-occupied homes in \\$1000s.\n",
"\n",
"_\bS_\bo_\bu_\br_\bc_\be:\n",
"\n",
" Harrison, D. and Rubinfeld, D.L. (1978) Hedonic prices and the\n",
" demand for clean air. _J. Environ. Economics and Management_ *5*,\n",
" 81-102.\n",
"\n",
" Belsley D.A., Kuh, E. and Welsch, R.E. (1980) _Regression\n",
" Diagnostics. Identifying Influential Data and Sources of\n",
" Collinearity._ New York: Wiley.\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"?Boston"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"Call:\n",
"lm(formula = medv ~ lstat, data = Boston)\n",
"\n",
"Residuals:\n",
" Min 1Q Median 3Q Max \n",
"-15.168 -3.990 -1.318 2.034 24.500 \n",
"\n",
"Coefficients:\n",
" Estimate Std. Error t value Pr(>|t|) \n",
"(Intercept) 34.55384 0.56263 61.41 <2e-16 ***\n",
"lstat -0.95005 0.03873 -24.53 <2e-16 ***\n",
"---\n",
"Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1\n",
"\n",
"Residual standard error: 6.216 on 504 degrees of freedom\n",
"Multiple R-squared: 0.5441,\tAdjusted R-squared: 0.5432 \n",
"F-statistic: 601.6 on 1 and 504 DF, p-value: < 2.2e-16\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"lm.fit = lm(medv~lstat, data=Boston)\n",
"summary(lm.fit)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}