MLstuff/ISLR/notebooks/.ipynb_checkpoints/3.6.2.R-checkpoint.ipynb

339 lines
10 KiB
Plaintext
Raw Permalink Normal View History

2020-08-01 22:25:45 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"library(MASS)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<table width=\"100%\" summary=\"page for Boston {MASS}\"><tr><td>Boston {MASS}</td><td style=\"text-align: right;\">R Documentation</td></tr></table>\n",
"\n",
"<h2>\n",
"Housing Values in Suburbs of Boston\n",
"</h2>\n",
"\n",
"<h3>Description</h3>\n",
"\n",
"<p>The <code>Boston</code> data frame has 506 rows and 14 columns.\n",
"</p>\n",
"\n",
"\n",
"<h3>Usage</h3>\n",
"\n",
"<pre>\n",
"Boston\n",
"</pre>\n",
"\n",
"\n",
"<h3>Format</h3>\n",
"\n",
"<p>This data frame contains the following columns:\n",
"</p>\n",
"\n",
"<dl>\n",
"<dt><code>crim</code></dt><dd>\n",
"<p>per capita crime rate by town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>zn</code></dt><dd>\n",
"<p>proportion of residential land zoned for lots over 25,000 sq.ft.\n",
"</p>\n",
"</dd>\n",
"<dt><code>indus</code></dt><dd>\n",
"<p>proportion of non-retail business acres per town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>chas</code></dt><dd>\n",
"<p>Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).\n",
"</p>\n",
"</dd>\n",
"<dt><code>nox</code></dt><dd>\n",
"<p>nitrogen oxides concentration (parts per 10 million).\n",
"</p>\n",
"</dd>\n",
"<dt><code>rm</code></dt><dd>\n",
"<p>average number of rooms per dwelling.\n",
"</p>\n",
"</dd>\n",
"<dt><code>age</code></dt><dd>\n",
"<p>proportion of owner-occupied units built prior to 1940.\n",
"</p>\n",
"</dd>\n",
"<dt><code>dis</code></dt><dd>\n",
"<p>weighted mean of distances to five Boston employment centres.\n",
"</p>\n",
"</dd>\n",
"<dt><code>rad</code></dt><dd>\n",
"<p>index of accessibility to radial highways.\n",
"</p>\n",
"</dd>\n",
"<dt><code>tax</code></dt><dd>\n",
"<p>full-value property-tax rate per \\$10,000.\n",
"</p>\n",
"</dd>\n",
"<dt><code>ptratio</code></dt><dd>\n",
"<p>pupil-teacher ratio by town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>black</code></dt><dd>\n",
"<p><i>1000(Bk - 0.63)^2</i> where <i>Bk</i> is the proportion of blacks\n",
"by town.\n",
"</p>\n",
"</dd>\n",
"<dt><code>lstat</code></dt><dd>\n",
"<p>lower status of the population (percent).\n",
"</p>\n",
"</dd>\n",
"<dt><code>medv</code></dt><dd>\n",
"<p>median value of owner-occupied homes in \\$1000s.\n",
"</p>\n",
"</dd>\n",
"</dl>\n",
"\n",
"\n",
"\n",
"<h3>Source</h3>\n",
"\n",
"<p>Harrison, D. and Rubinfeld, D.L. (1978)\n",
"Hedonic prices and the demand for clean air.\n",
"<em>J. Environ. Economics and Management</em>\n",
"<b>5</b>, 81&ndash;102.\n",
"</p>\n",
"<p>Belsley D.A., Kuh, E. and Welsch, R.E. (1980)\n",
"<em>Regression Diagnostics. Identifying Influential Data and Sources\n",
"of Collinearity.</em>\n",
"New York: Wiley.\n",
"</p>\n",
"\n",
"<hr /><div style=\"text-align: center;\">[Package <em>MASS</em> version 7.3-51.5 ]</div>"
],
"text/latex": [
"\\inputencoding{utf8}\n",
"\\HeaderA{Boston}{Housing Values in Suburbs of Boston}{Boston}\n",
"\\keyword{datasets}{Boston}\n",
"%\n",
"\\begin{Description}\\relax\n",
"The \\code{Boston} data frame has 506 rows and 14 columns.\n",
"\\end{Description}\n",
"%\n",
"\\begin{Usage}\n",
"\\begin{verbatim}\n",
"Boston\n",
"\\end{verbatim}\n",
"\\end{Usage}\n",
"%\n",
"\\begin{Format}\n",
"This data frame contains the following columns:\n",
"\\begin{description}\n",
"\n",
"\\item[\\code{crim}] \n",
"per capita crime rate by town.\n",
"\n",
"\\item[\\code{zn}] \n",
"proportion of residential land zoned for lots over 25,000 sq.ft.\n",
"\n",
"\\item[\\code{indus}] \n",
"proportion of non-retail business acres per town.\n",
"\n",
"\\item[\\code{chas}] \n",
"Charles River dummy variable (= 1 if tract bounds river; 0 otherwise).\n",
"\n",
"\\item[\\code{nox}] \n",
"nitrogen oxides concentration (parts per 10 million).\n",
"\n",
"\\item[\\code{rm}] \n",
"average number of rooms per dwelling.\n",
"\n",
"\\item[\\code{age}] \n",
"proportion of owner-occupied units built prior to 1940.\n",
"\n",
"\\item[\\code{dis}] \n",
"weighted mean of distances to five Boston employment centres.\n",
"\n",
"\\item[\\code{rad}] \n",
"index of accessibility to radial highways.\n",
"\n",
"\\item[\\code{tax}] \n",
"full-value property-tax rate per \\bsl{}\\$10,000.\n",
"\n",
"\\item[\\code{ptratio}] \n",
"pupil-teacher ratio by town.\n",
"\n",
"\\item[\\code{black}] \n",
"\\eqn{1000(Bk - 0.63)^2}{} where \\eqn{Bk}{} is the proportion of blacks\n",
"by town.\n",
"\n",
"\\item[\\code{lstat}] \n",
"lower status of the population (percent).\n",
"\n",
"\\item[\\code{medv}] \n",
"median value of owner-occupied homes in \\bsl{}\\$1000s.\n",
"\n",
"\n",
"\\end{description}\n",
"\n",
"\\end{Format}\n",
"%\n",
"\\begin{Source}\\relax\n",
"Harrison, D. and Rubinfeld, D.L. (1978)\n",
"Hedonic prices and the demand for clean air.\n",
"\\emph{J. Environ. Economics and Management}\n",
"\\bold{5}, 81--102.\n",
"\n",
"Belsley D.A., Kuh, E. and Welsch, R.E. (1980)\n",
"\\emph{Regression Diagnostics. Identifying Influential Data and Sources\n",
"of Collinearity.}\n",
"New York: Wiley.\n",
"\\end{Source}"
],
"text/plain": [
"Boston package:MASS R Documentation\n",
"\n",
"_\bH_\bo_\bu_\bs_\bi_\bn_\bg _\bV_\ba_\bl_\bu_\be_\bs _\bi_\bn _\bS_\bu_\bb_\bu_\br_\bb_\bs _\bo_\bf _\bB_\bo_\bs_\bt_\bo_\bn\n",
"\n",
"_\bD_\be_\bs_\bc_\br_\bi_\bp_\bt_\bi_\bo_\bn:\n",
"\n",
" The Boston data frame has 506 rows and 14 columns.\n",
"\n",
"_\bU_\bs_\ba_\bg_\be:\n",
"\n",
" Boston\n",
" \n",
"_\bF_\bo_\br_\bm_\ba_\bt:\n",
"\n",
" This data frame contains the following columns:\n",
"\n",
" crim per capita crime rate by town.\n",
"\n",
" zn proportion of residential land zoned for lots over 25,000\n",
" sq.ft.\n",
"\n",
" indus proportion of non-retail business acres per town.\n",
"\n",
" chas Charles River dummy variable (= 1 if tract bounds river; 0\n",
" otherwise).\n",
"\n",
" nox nitrogen oxides concentration (parts per 10 million).\n",
"\n",
" rm average number of rooms per dwelling.\n",
"\n",
" age proportion of owner-occupied units built prior to 1940.\n",
"\n",
" dis weighted mean of distances to five Boston employment\n",
" centres.\n",
"\n",
" rad index of accessibility to radial highways.\n",
"\n",
" tax full-value property-tax rate per \\$10,000.\n",
"\n",
" ptratio pupil-teacher ratio by town.\n",
"\n",
" black 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by\n",
" town.\n",
"\n",
" lstat lower status of the population (percent).\n",
"\n",
" medv median value of owner-occupied homes in \\$1000s.\n",
"\n",
"_\bS_\bo_\bu_\br_\bc_\be:\n",
"\n",
" Harrison, D. and Rubinfeld, D.L. (1978) Hedonic prices and the\n",
" demand for clean air. _J. Environ. Economics and Management_ *5*,\n",
" 81-102.\n",
"\n",
" Belsley D.A., Kuh, E. and Welsch, R.E. (1980) _Regression\n",
" Diagnostics. Identifying Influential Data and Sources of\n",
" Collinearity._ New York: Wiley.\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"?Boston"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"Call:\n",
"lm(formula = medv ~ lstat, data = Boston)\n",
"\n",
"Residuals:\n",
" Min 1Q Median 3Q Max \n",
"-15.168 -3.990 -1.318 2.034 24.500 \n",
"\n",
"Coefficients:\n",
" Estimate Std. Error t value Pr(>|t|) \n",
"(Intercept) 34.55384 0.56263 61.41 <2e-16 ***\n",
"lstat -0.95005 0.03873 -24.53 <2e-16 ***\n",
"---\n",
"Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1\n",
"\n",
"Residual standard error: 6.216 on 504 degrees of freedom\n",
"Multiple R-squared: 0.5441,\tAdjusted R-squared: 0.5432 \n",
"F-statistic: 601.6 on 1 and 504 DF, p-value: < 2.2e-16\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"lm.fit = lm(medv~lstat, data=Boston)\n",
"summary(lm.fit)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}