Você está na página 1de 20

9/19/2019 Replace with Main Title

Replace with Main Title


Your Name
2019-09-19

> names(Real_estate_valuation_data_set_1_) <-


+ make.names(names(Real_estate_valuation_data_set_1_))

> summary(Real_estate_valuation_data_set_1_)

No X2.house.age X3.distance.to.the.nearest.MRT.station
Min. : 1.0 Min. : 0.000 Min. : 23.38
1st Qu.:104.2 1st Qu.: 9.025 1st Qu.: 289.32
Median :207.5 Median :16.100 Median : 492.23
Mean :207.5 Mean :17.713 Mean :1083.89
3rd Qu.:310.8 3rd Qu.:28.150 3rd Qu.:1454.28
Max. :414.0 Max. :43.800 Max. :6488.02
X4.number.of.convenience.stores X5.latitude X6.longitude
Min. : 0.000 Min. :24.93 Min. :121.5
1st Qu.: 1.000 1st Qu.:24.96 1st Qu.:121.5
Median : 4.000 Median :24.97 Median :121.5
Mean : 4.094 Mean :24.97 Mean :121.5
3rd Qu.: 6.000 3rd Qu.:24.98 3rd Qu.:121.5
Max. :10.000 Max. :25.01 Max. :121.6
Y.house.price.of.unit.area
Min. : 7.60
1st Qu.: 27.70
Median : 38.45
Mean : 37.98
3rd Qu.: 46.60
Max. :117.50

> library(abind, pos=25)

> library(e1071, pos=26)

> numSummary(Real_estate_valuation_data_set_1_[,"Y.house.price.of.unit.area

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 1/20
9/19/2019 Replace with Main Title

mean sd IQR 0% 25% 50% 75% 100% n


37.98019 13.60649 18.9 7.6 27.7 38.45 46.6 117.5 414

> with(Real_estate_valuation_data_set_1_, (t.test(Y.house.price.of.unit.are

One Sample t-test

data: Y.house.price.of.unit.area
t = 0.0000048401, df = 413, p-value = 1
alternative hypothesis: true mean is not equal to 37.98019
95 percent confidence interval:
36.66567 39.29472
sample estimates:
mean of x
37.98019

> cor(Real_estate_valuation_data_set_1_[,c("X2.house.age","X3.distance.to.t

X2.house.age
X2.house.age 1.00000000
X3.distance.to.the.nearest.MRT.station 0.02562205
X3.distance.to.the.nearest.MRT.stati
X2.house.age 0.025622
X3.distance.to.the.nearest.MRT.station 1.000000

> RegModel.12 <- lm(Y.house.price.of.unit.area~X2.house.age, data=Real_esta


> summary(RegModel.12)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 2/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age, data = Real_estate_

Residuals:
Min 1Q Median 3Q Max
-31.113 -10.738 1.626 8.199 77.781

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.43470 1.21098 35.042 < 2e-16 ***
X2.house.age -0.25149 0.05752 -4.372 0.0000156 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13.32 on 412 degrees of freedom


Multiple R-squared: 0.04434, Adjusted R-squared: 0.04202
F-statistic: 19.11 on 1 and 412 DF, p-value: 0.0000156

> RegModel.13 <- lm(Y.house.price.of.unit.area~X2.house.age+X3.distance.to.


> summary(RegModel.13)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 3/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age + X3.distance.to.the
data = Real_estate_valuation_data_set_1_)

Residuals:
Min 1Q Median 3Q Max
-36.032 -4.742 -1.037 4.533 71.930

Coefficients:
Estimate Std. Error t value
(Intercept) 49.8855858 0.9677644 51.547
X2.house.age -0.2310266 0.0420383 -5.496
X3.distance.to.the.nearest.MRT.station -0.0072086 0.0003795 -18.997
Pr(>|t|)
(Intercept) < 2e-16 ***
X2.house.age 0.0000000684 ***
X3.distance.to.the.nearest.MRT.station < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 9.73 on 411 degrees of freedom


Multiple R-squared: 0.4911, Adjusted R-squared: 0.4887
F-statistic: 198.3 on 2 and 411 DF, p-value: < 2.2e-16

> Real <- readXL("E:/Term 2/BA/Real estate valuation data set (1).xlsx", ro

Error: Sheet '???1' not found

> set.seed(123)

> set.seed(123)

> sampleset <- sample(c(TRUE, FALSE), nrow(Real_estate_valuation_data_set_1

> train <- Real_estate_valuation_data_set_1_[sampleset, ]

> test <- Real_estate_valuation_data_set_1_[!sampleset, ]

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 4/20
9/19/2019 Replace with Main Title

names(Realestate_valuation_data_set_1) <-
make.names(names(Realestate_valuation_data_set_1))
summary(Realestate_valuation_data_set_1)

> numSummary(Real_estate_valuation_data_set_1_[,"Y.house.price.of.unit.area

mean sd IQR 0% 25% 50% 75% 100% n


37.98019 13.60649 18.9 7.6 27.7 38.45 46.6 117.5 414

> with(Real_estate_valuation_data_set_1_, (t.test(Y.house.price.of.unit.are

One Sample t-test

data: Y.house.price.of.unit.area
t = 0.0000048401, df = 413, p-value = 1
alternative hypothesis: true mean is not equal to 37.98019
95 percent confidence interval:
36.66567 39.29472
sample estimates:
mean of x
37.98019

> cor(Real_estate_valuation_data_set_1_[,c("X2.house.age","Y.house.price.of

X2.house.age Y.house.price.of.unit.area
X2.house.age 1.000000 -0.210567
Y.house.price.of.unit.area -0.210567 1.000000

> with(Real_estate_valuation_data_set_1_, cor.test(X2.house.age, Y.house.pr

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 5/20
9/19/2019 Replace with Main Title

Pearson's product-moment correlation

data: X2.house.age and Y.house.price.of.unit.area


t = -4.3721, df = 412, p-value = 0.0000156
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3008396 -0.1165546
sample estimates:
cor
-0.210567

> with(Real_estate_valuation_data_set_1_, cor.test(X3.distance.to.the.neare

Pearson's product-moment correlation

data: X3.distance.to.the.nearest.MRT.station and Y.house.price.of.unit.are


t = -18.5, df = 412, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.7230493 -0.6173117
sample estimates:
cor
-0.6736129

> RegModel.4 <- lm(Y.house.price.of.unit.area~X2.house.age, data=Real_estat


> summary(RegModel.4)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 6/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age, data = Real_estate_

Residuals:
Min 1Q Median 3Q Max
-31.113 -10.738 1.626 8.199 77.781

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.43470 1.21098 35.042 < 2e-16 ***
X2.house.age -0.25149 0.05752 -4.372 0.0000156 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13.32 on 412 degrees of freedom


Multiple R-squared: 0.04434, Adjusted R-squared: 0.04202
F-statistic: 19.11 on 1 and 412 DF, p-value: 0.0000156

> RegModel.5 <- lm(Y.house.price.of.unit.area~X2.house.age+X3.distance.to.t


> summary(RegModel.5)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 7/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age + X3.distance.to.the
data = Real_estate_valuation_data_set_1_)

Residuals:
Min 1Q Median 3Q Max
-36.032 -4.742 -1.037 4.533 71.930

Coefficients:
Estimate Std. Error t value
(Intercept) 49.8855858 0.9677644 51.547
X2.house.age -0.2310266 0.0420383 -5.496
X3.distance.to.the.nearest.MRT.station -0.0072086 0.0003795 -18.997
Pr(>|t|)
(Intercept) < 2e-16 ***
X2.house.age 0.0000000684 ***
X3.distance.to.the.nearest.MRT.station < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 9.73 on 411 degrees of freedom


Multiple R-squared: 0.4911, Adjusted R-squared: 0.4887
F-statistic: 198.3 on 2 and 411 DF, p-value: < 2.2e-16

> RegModel.6 <- lm(Y.house.price.of.unit.area~X2.house.age+X3.distance.to.t


> summary(RegModel.6)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 8/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age + X3.distance.to.the
X4.number.of.convenience.stores, data = Real_estate_valuation_data_set_

Residuals:
Min 1Q Median 3Q Max
-37.304 -5.430 -1.738 4.325 77.315

Coefficients:
Estimate Std. Error t value
(Intercept) 42.977286 1.384542 31.041
X2.house.age -0.252856 0.040105 -6.305
X3.distance.to.the.nearest.MRT.station -0.005379 0.000453 -11.874
X4.number.of.convenience.stores 1.297443 0.194290 6.678
Pr(>|t|)
(Intercept) < 2e-16 ***
X2.house.age 7.47e-10 ***
X3.distance.to.the.nearest.MRT.station < 2e-16 ***
X4.number.of.convenience.stores 7.91e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 9.251 on 410 degrees of freedom


Multiple R-squared: 0.5411, Adjusted R-squared: 0.5377
F-statistic: 161.1 on 3 and 410 DF, p-value: < 2.2e-16

> RegModel.7 <- lm(Y.house.price.of.unit.area~X2.house.age+X3.distance.to.t


> summary(RegModel.7)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 9/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age + X3.distance.to.the
X4.number.of.convenience.stores + X5.latitude, data = Real_estate_valua

Residuals:
Min 1Q Median 3Q Max
-34.522 -5.292 -1.579 4.264 76.466

Coefficients:
Estimate Std. Error t value
(Intercept) -5916.0064964 1112.7321600 -5.317
X2.house.age -0.2687192 0.0389293 -6.903
X3.distance.to.the.nearest.MRT.station -0.0041751 0.0004928 -8.473
X4.number.of.convenience.stores 1.1647814 0.1896707 6.141
X5.latitude 238.6357354 44.5608638 5.355
Pr(>|t|)
(Intercept) 1.74e-07 ***
X2.house.age 1.95e-11 ***
X3.distance.to.the.nearest.MRT.station 4.37e-16 ***
X4.number.of.convenience.stores 1.94e-09 ***
X5.latitude 1.43e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 8.954 on 409 degrees of freedom


Multiple R-squared: 0.5711, Adjusted R-squared: 0.5669
F-statistic: 136.2 on 4 and 409 DF, p-value: < 2.2e-16

> numSummary(Real_estate_valuation_data_set_1_[,"Y.house.price.of.unit.area

mean sd IQR 0% 25% 50% 75% 100% n


37.98019 13.60649 18.9 7.6 27.7 38.45 46.6 117.5 414

> with(Real_estate_valuation_data_set_1_, (t.test(Y.house.price.of.unit.are

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 10/20
9/19/2019 Replace with Main Title

One Sample t-test

data: Y.house.price.of.unit.area
t = 56.795, df = 413, p-value < 2.2e-16
alternative hypothesis: true mean is greater than 0
95 percent confidence interval:
36.87777 Inf
sample estimates:
mean of x
37.98019

> RegModel.8 <- lm(Y.house.price.of.unit.area~X2.house.age+X3.distance.to.t


> summary(RegModel.8)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 11/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age + X3.distance.to.the
X4.number.of.convenience.stores + X5.latitude, data = Real_estate_valua

Residuals:
Min 1Q Median 3Q Max
-34.522 -5.292 -1.579 4.264 76.466

Coefficients:
Estimate Std. Error t value
(Intercept) -5916.0064964 1112.7321600 -5.317
X2.house.age -0.2687192 0.0389293 -6.903
X3.distance.to.the.nearest.MRT.station -0.0041751 0.0004928 -8.473
X4.number.of.convenience.stores 1.1647814 0.1896707 6.141
X5.latitude 238.6357354 44.5608638 5.355
Pr(>|t|)
(Intercept) 1.74e-07 ***
X2.house.age 1.95e-11 ***
X3.distance.to.the.nearest.MRT.station 4.37e-16 ***
X4.number.of.convenience.stores 1.94e-09 ***
X5.latitude 1.43e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 8.954 on 409 degrees of freedom


Multiple R-squared: 0.5711, Adjusted R-squared: 0.5669
F-statistic: 136.2 on 4 and 409 DF, p-value: < 2.2e-16

names(Realestate_valuation_data_set_1) <-
make.names(names(Realestate_valuation_data_set_1))

> numSummary(Real_estate_valuation_data_set_1_[,"Y.house.price.of.unit.area

mean sd IQR 0% 25% 50% 75% 100% n


37.98019 13.60649 18.9 7.6 27.7 38.45 46.6 117.5 414

> with(Real_estate_valuation_data_set_1_, (t.test(Y.house.price.of.unit.are

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 12/20
9/19/2019 Replace with Main Title

One Sample t-test

data: Y.house.price.of.unit.area
t = 0.00028896, df = 413, p-value = 0.9998
alternative hypothesis: true mean is not equal to 37.98
95 percent confidence interval:
36.66567 39.29472
sample estimates:
mean of x
37.98019

> numSummary(Real_estate_valuation_data_set_1_[,"Y.house.price.of.unit.area

mean sd IQR 0% 25% 50% 75% 100% n


37.98019 13.60649 18.9 7.6 27.7 38.45 46.6 117.5 414

> summary(Real_estate_valuation_data_set_1_)

No X2.house.age X3.distance.to.the.nearest.MRT.station
Min. : 1.0 Min. : 0.000 Min. : 23.38
1st Qu.:104.2 1st Qu.: 9.025 1st Qu.: 289.32
Median :207.5 Median :16.100 Median : 492.23
Mean :207.5 Mean :17.713 Mean :1083.89
3rd Qu.:310.8 3rd Qu.:28.150 3rd Qu.:1454.28
Max. :414.0 Max. :43.800 Max. :6488.02
X4.number.of.convenience.stores X5.latitude X6.longitude
Min. : 0.000 Min. :24.93 Min. :121.5
1st Qu.: 1.000 1st Qu.:24.96 1st Qu.:121.5
Median : 4.000 Median :24.97 Median :121.5
Mean : 4.094 Mean :24.97 Mean :121.5
3rd Qu.: 6.000 3rd Qu.:24.98 3rd Qu.:121.5
Max. :10.000 Max. :25.01 Max. :121.6
Y.house.price.of.unit.area
Min. : 7.60
1st Qu.: 27.70
Median : 38.45
Mean : 37.98
3rd Qu.: 46.60
Max. :117.50

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 13/20
9/19/2019 Replace with Main Title

> with(Real_estate_valuation_data_set_1_, cor.test(X2.house.age, Y.house.pr

Pearson's product-moment correlation

data: X2.house.age and Y.house.price.of.unit.area


t = -4.3721, df = 412, p-value = 0.0000156
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3008396 -0.1165546
sample estimates:
cor
-0.210567

> RegModel.9 <- lm(Y.house.price.of.unit.area~X2.house.age, data=Real_estat


> summary(RegModel.9)

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age, data = Real_estate_

Residuals:

Min 1Q Median 3Q Max


-31.113 -10.738 1.626 8.199 77.781

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.43470 1.21098 35.042 < 2e-16 ***
X2.house.age -0.25149 0.05752 -4.372 0.0000156 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13.32 on 412 degrees of freedom


Multiple R-squared: 0.04434, Adjusted R-squared: 0.04202
F-statistic: 19.11 on 1 and 412 DF, p-value: 0.0000156

> RegModel.11 <- lm(Y.house.price.of.unit.area~X2.house.age+X3.distance.to.


> summary(RegModel.11)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 14/20
9/19/2019 Replace with Main Title

Call:
lm(formula = Y.house.price.of.unit.area ~ X2.house.age + X3.distance.to.the
data = Real_estate_valuation_data_set_1_)

Residuals:
Min 1Q Median 3Q Max
-36.032 -4.742 -1.037 4.533 71.930

Coefficients:
Estimate Std. Error t value
(Intercept) 49.8855858 0.9677644 51.547
X2.house.age -0.2310266 0.0420383 -5.496
X3.distance.to.the.nearest.MRT.station -0.0072086 0.0003795 -18.997
Pr(>|t|)
(Intercept) < 2e-16 ***
X2.house.age 0.0000000684 ***
X3.distance.to.the.nearest.MRT.station < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 9.73 on 411 degrees of freedom


Multiple R-squared: 0.4911, Adjusted R-squared: 0.4887
F-statistic: 198.3 on 2 and 411 DF, p-value: < 2.2e-16

> Boxplot( ~ X2.house.age, data=Real_estate_valuation_data_set_1_, id=list(

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 15/20
9/19/2019 Replace with Main Title

> distance <- get_dist(Real_estate_valuation_data_set_1_)

> distance <- get_dist(Real_estate_valuation_data_set_1_)

> .cluster <- KMeans(model.matrix(~-1 + X2.house.age + Y.house.price.of.un


> .cluster$size # Cluster Sizes

[1] 162 252

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 16/20
9/19/2019 Replace with Main Title

> .cluster$centers # Cluster Centroids

new.x.X2.house.age new.x.Y.house.price.of.unit.area
1 10.13025 50.18333
2 22.58690 30.13532

> .cluster$withinss # Within Cluster Sum of Squares

[1] 28552.41 46577.66

> .cluster$tot.withinss # Total Within Sum of Squares

[1] 75130.07

> .cluster$betweenss # Between Cluster Sum of Squares

[1] 54934.04

> biplot(princomp(model.matrix(~-1 + X2.house.age + Y.house.price.of.unit.a

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 17/20
9/19/2019 Replace with Main Title

> remove(.cluster)

> distance <- get_dist(Real_estate_valuation_data_set_1_)

> fviz_dist(distance, gradient = list(low = "#00AFBB", mid = "white", high

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 18/20
9/19/2019 Replace with Main Title

> #K-mean cluster analysis

> USAkm <- kmeans(Real_estate_valuation_data_set_1_, centers = 2, nstart =

> #Visualize the clusters

> fviz_cluster(USAkm, data = Real_estate_valuation_data_set_1_)

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 19/20
9/19/2019 Replace with Main Title

> #Elbow method

> set.seed(123)

> #Gap stat

file:///C:/Users/Personal/Documents/RcmdrMarkdown.html 20/20

Você também pode gostar