library(caret)
library(caret)
install.packages("gbm")
install.packages("tidyverse")
install.packages("xtable")
install.packages("estimatr")
install.packages("gbm")
install.packages("texreg")
install.packages("caret")
install.packages("doParallel")

install.packages("foreach")
library(gbm)
library(foreach)
#load packages#
library(xtable)
library(estimatr)
library(texreg)
library(tidyverse)
library(readxl)
library(gbm)

#load packages#
library(doParallel)
library(estimatr)
library(texreg)
library(tidyverse)
library(readxl)
library(caret)
library(foreach)
#load packages#
library(xtable)
library(estimatr)
library(texreg)
library(tidyverse)
library(readxl)
write.csv(group3,"C:/Users/jizha/OneDrive/Desktop/London_house_price0811data-group3-111m.csv")

dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/London_house_price0811data-group3-111m.csv")
dat<-dataset
dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/london_house_price0811data-group3-111m-nitime.csv")
london_house_price1108data-2011-202240%-18m
dat <- read_csv("C:/Users/jizha/Desktop/london_house_price1108data-2011-202240%-18m.csv")
library(dplyr)
df<-dat
df <- df %>%
  rename(
    average_price =price,
    price = price1
  )

library(caret)

#load packages#
library(xtable)
library(estimatr)
library(texreg)
library(tidyverse)
library(readxl)
dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/London_house_price0811data-group3-111m.csv")
dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/london_house_price0811data-group3-111m-nitime.csv")
source("KNN/plm.knn.R")
source("NW/plm.NW.R")
source("LPN/plm.localpoly.pred.R")
lianjia <-df
cl=24
library(caret)

#load packages#
library(xtable)
library(estimatr)
library(texreg)
library(tidyverse)
library(readxl)
dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/London_house_price0811data-group3-111m.csv")
dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/london_house_price0811data-group3-111m-nitime.csv")
source("KNN/plm.knn.R")
source("NW/plm.NW.R")
source("LPN/plm.localpoly.pred.R")
lianjia <-df
cl=24

# OLS formula
formula.OLS=price~
  square+numberRoom+
  +buildingType+
  age+energy_consumption_current+heating_cost_potential +co2_emissions_current+
  post_code+current_energy_rating+
  communityAverage+employment+median_income+dist.center
# OLS formula
formula.OLS1=price~
  square+numberRoom+
  +buildingType+
  age+energy_consumption_current+heating_cost_potential +co2_emissions_current+
  post_code+current_energy_rating+
  communityAverage+employment+median_income+Lat+Lng+dist.center

###########
# SNP formula
lianjia.SNP=list()
lianjia.SNP$X=lianjia[,c("square","numberRoom")]
lianjia.SNP$X=cbind(lianjia.SNP$X,predict(dummyVars(~buildingType,data=lianjia),
                                          newdata=lianjia)[,-1])
lianjia.SNP$X=cbind(lianjia.SNP$X,
                    predict(dummyVars(~current_energy_rating,data=lianjia),newdata=lianjia)[,-1])
lianjia.SNP$X=cbind(lianjia.SNP$X,lianjia[,c("age","energy_consumption_current","heating_cost_potential","co2_emissions_current")])
lianjia.SNP$X=cbind(lianjia.SNP$X,predict(dummyVars(~post_code,data=lianjia),newdata=lianjia)[,-1])
lianjia.SNP$X=cbind(lianjia.SNP$X,tradeyear=lianjia$tradeYear)
lianjia.SNP$X=cbind(lianjia.SNP$X,communityAverage=lianjia$communityAverage)
lianjia.SNP$X=cbind(lianjia.SNP$X,employmentAverage=lianjia$employment)
lianjia.SNP$X=cbind(lianjia.SNP$X,communityAverage=lianjia$communityAverage)
lianjia.SNP$X=cbind(lianjia.SNP$X,median_incomeavg=lianjia$median_income)
#lianjia.SNP$X=cbind(lianjia.SNP$X,dist=lianjia$dist.center)
lianjia.SNP$X=as.matrix(lianjia.SNP$X)
lianjia.SNP$W=as.matrix(lianjia[,c("Lng","Lat")])


###########
# Tree formula without coordinates
formula.tree.noc=price~
  square+numberRoom+
  +buildingType+
  age+energy_consumption_current+heating_cost_potential +co2_emissions_current+
  post_code+current_energy_rating+tradeYear+
  communityAverage+employment+median_income+dist.center

# Tree formula with coordinates
formula.tree=price~
  square+numberRoom+
  +buildingType+
  age+energy_consumption_current+heating_cost_potential +co2_emissions_current+
  post_code+current_energy_rating+tradeYear+
  communityAverage+employment+median_income+Lat+Lng+dist.center

#############

cl=24

library(caret)
# Training sample
set.seed(123)
train_ind=createDataPartition(1:nrow(lianjia),p=0.7)$Resample1
# OLS
cat(paste("Starting OLS at",Sys.time()),"\n")
model.OLS=lm(formula.OLS,data=lianjia[train_ind,])
pred.OLS=predict(model.OLS,newdata=lianjia[-train_ind,])

######
model.OLS1=lm(formula.OLS1,data=lianjia[train_ind,])
pred.OLS1=predict(model.OLS1,newdata=lianjia[-train_ind,])

summary(model.OLS1)
# SKNN with L1 distance
cat(paste("Starting SKNN.L1 at",Sys.time()),"\n")
model.SKNN.L1=plm.knn(k=100,
                      y=lianjia$price[train_ind],X=lianjia.SNP$X[train_ind,],W=lianjia.SNP$W[train_ind,],
                      X.new=lianjia.SNP$X[-train_ind,],W.new=lianjia.SNP$W[-train_ind,],
                      p=2,cl=cl)
pred.SKNN.L1=model.SKNN.L1$pred.out

# SKNN with L2 distance
cat(paste("Starting SKNN.L2 at",Sys.time()),"\n")
model.SKNN.L2=plm.knn(k=200,
                      y=lianjia$price[train_ind],X=lianjia.SNP$X[train_ind,],W=lianjia.SNP$W[train_ind,],
                      X.new=lianjia.SNP$X[-train_ind,],W.new=lianjia.SNP$W[-train_ind,],
                      p=2,cl=cl)
pred.SKNN.L2=model.SKNN.L2$pred.out



# SLPN.1
cat(paste("Starting SLPN.1 at",Sys.time()),"\n")
pred.SLPN.1=plm.localpoly.pred(y=lianjia$price[train_ind],
                               X=lianjia.SNP$X[train_ind,],W=lianjia.SNP$W[train_ind,],
                               X.new=lianjia.SNP$X[-train_ind,],W.new=lianjia.SNP$W[-train_ind,],
                               h=c(0.07,0.07),p=1,cl=cl)
# SLPN.2
cat(paste("Starting SLPN.2 at",Sys.time()),"\n")
pred.SLPN.2=plm.localpoly.pred(y=lianjia$price[train_ind],
                               X=lianjia.SNP$X[train_ind,],W=lianjia.SNP$W[train_ind,],
                               X.new=lianjia.SNP$X[-train_ind,],W.new=lianjia.SNP$W[-train_ind,],
                               h=c(0.09,0.09),p=2,cl=cl)

# SLPN.3
cat(paste("Starting SLPN.3 at",Sys.time()),"\n")
pred.SLPN.3=plm.localpoly.pred(y=lianjia$price[train_ind],
                               X=lianjia.SNP$X[train_ind,],W=lianjia.SNP$W[train_ind,],
                               X.new=lianjia.SNP$X[-train_ind,],W.new=lianjia.SNP$W[-train_ind,],
                               h=c(0.11,0.11),p=3,cl=cl)

# RF without Coordinates
cat(paste("Starting RF without coordinates at",Sys.time()),"\n")
doParallel::registerDoParallel(cl)
model.RF.noc=train(formula.tree.noc,data=lianjia[train_ind,],
                   method="rf",distribution="gaussian",
                   tuneGrid=data.frame(mtry=13),nodesize=20,maxnodes=10500,
                   verbose=F)
pred.RF.noc=predict(model.RF.noc,newdata=lianjia[-train_ind,])
doParallel::stopImplicitCluster()

# RF with Coordinates
cat(paste("Starting RF with coordinates at",Sys.time()),"\n")
doParallel::registerDoParallel(cl)
model.RF=train(formula.tree,data=lianjia[train_ind,],
               method="rf",distribution="gaussian",
               tuneGrid=data.frame(mtry=15),nodesize=20,maxnodes=9800,
               verbose=F)
pred.RF=predict(model.RF,newdata=lianjia[-train_ind,])
doParallel::stopImplicitCluster()

# GBM without Coordinates
cat(paste("Starting GBM without coordinates at",Sys.time()),"\n")
doParallel::registerDoParallel(cl)
model.GBM.noc=train(formula.tree.noc,data=lianjia[train_ind,],
                    method="gbm",distribution="gaussian",
                    tuneGrid=data.frame(n.trees=12000,
                                        interaction.depth=25,
                                        shrinkage=0.005,
                                        n.minobsinnode=20),
                    verbose=F)
pred.GBM.noc=predict(model.GBM.noc,newdata=lianjia[-train_ind,])
doParallel::stopImplicitCluster()

# GBM with Coordinates
cat(paste("Starting GBM with coordinates at",Sys.time()),"\n")
doParallel::registerDoParallel(cl)
model.GBM=train(formula.tree,data=lianjia[train_ind,],
                method="gbm",distribution="gaussian",
                tuneGrid=data.frame(n.trees=10000,
                                    interaction.depth=20,
                                    shrinkage=0.005,
                                    n.minobsinnode=20),
                verbose=F)
pred.GBM=predict(model.GBM,newdata=lianjia[-train_ind,])
doParallel::stopImplicitCluster()
