1691822741364:install.packages("gbm") 1691822743113:install.packages("tidyverse") 1691822796245:install.packages("xtable") 1691822797686:install.packages("estimatr") 1691822806460:install.packages("gbm") 1691822808050:install.packages("texreg") 1691822809520:install.packages("caret") 1691822820072:install.packages("doParallel") 1691822821523:install.packages("foreach") 1691822863237:library(gbm) 1691822864422:library(foreach) 1691822864476:#load packages# 1691822864477:library(xtable) 1691822864488:library(estimatr) 1691822864811:library(texreg) 1691822864821:library(tidyverse) 1691822866160:library(readxl) 1691822866184:library(gbm) 1691822866186:#load packages# 1691822866187:library(doParallel) 1691822866274:library(estimatr) 1691822866276:library(texreg) 1691822866277:library(tidyverse) 1691822866278:library(readxl) 1691822866279:library(caret) 1691822867152:library(foreach) 1691822867154:#load packages# 1691822867155:library(xtable) 1691822867155:library(estimatr) 1691822867156:library(texreg) 1691822867157:library(tidyverse) 1691822867159:library(readxl) 1691824476892:plm.localpoly.pred=function(y,X,W,X.new,W.new,h,p=1,kernel.fun="Epanechnikov",cl=1) { 1691824476897:library(foreach) 1691824476902:n=length(y) 1691824476906:if(kernel.fun=="Epanechnikov") { 1691824476909:K=function(u) {3/4*(1-u^2)*(abs(u)<=1)} 1691824476911:} 1691824476913:localpoly.mean=function(w,G) { 1691824476914:D=W-matrix(rep(w,each=n),nrow=n) 1691824476916:omega=apply(Matrix::Matrix(K(t(D)/h),sparse=T),MARGIN=2,FUN=prod) 1691824476918:local.ind=which(omega!=0) 1691824476920:Z=rep(1,length(local.ind)) 1691824476921:if(p>0) { 1691824476923:Z=cbind(Z,poly(D[local.ind,,drop=F],degree=p,raw=T,simple=T)) 1691824476925:} 1691824476927:ZW=t(Z*omega[local.ind]) 1691824476929:Proj=(solve(ZW%*%Z)%*%ZW)[1,] 1691824476931:m=colSums(G[local.ind,,drop=F]*Proj) 1691824476933:return(m) 1691824476936:} 1691824476938:if(cl>1) { 1691824476939:doParallel::registerDoParallel(cl) 1691824476940:M=foreach(i=1:n,.combine=rbind) %dopar% {localpoly.mean(w=W[i,],G=cbind(y,X))} 1691824476941:doParallel::stopImplicitCluster() 1691824476942:} else { 1691824476943:M=foreach(i=1:n,.combine=rbind) %do% {localpoly.mean(w=W[i,],G=cbind(y,X))} 1691824476944:} 1691824476945:y_tilde=y-M[,1];X_tilde=X-M[,-1] 1691824476946:beta_hat=as.vector(solve(t(X_tilde)%*%X_tilde)%*%t(X_tilde)%*%y_tilde) 1691824476947:gamma_in=y-X%*%beta_hat 1691824476948:if(cl>1) { 1691824476949:doParallel::registerDoParallel(cl) 1691824476950:gamma_out=foreach(i=1:nrow(W.new),.combine=c) %dopar% {localpoly.mean(w=W.new[i,],G=gamma_in)} 1691824476952:doParallel::stopImplicitCluster() 1691824476953:} else { 1691824476954:gamma_out=foreach(i=1:nrow(W.new),.combine=c) %do% {localpoly.mean(w=W.new[i,],G=gamma_in)} 1691824476956:} 1691824476958:y_out=as.vector(X.new%*%beta_hat)+gamma_out 1691824476959:return(y_out) 1691824476961:} 1691824476962:plm.localpoly.tune.cv=function(y,X,W,h.grid,p.grid,fold=4,cl=1) { 1691824476963:n=length(y) 1691824476964:tuneGrid=data.frame(p=rep(p.grid,each=nrow(h.grid)),h=h.grid) 1691824476966:group=caret::createFolds(1:n,k=fold) 1691824476967:plm.localpoly.cv=function(h,p) { 1691824476968:y_hat=rep(NA,n) 1691824476969:for (k in 1:fold) { 1691824476969:y_hat[group[[k]]]=plm.localpoly.pred(y=y[-group[[k]]],X=X[-group[[k]],],W=W[-group[[k]],], 1691824476971:X.new=X[group[[k]],],W.new=W[group[[k]],],h=h,p=p,cl=cl) 1691824476971:} 1691824476972:e2=(y-y_hat)^2 1691824476973:R2=1-sum(e2)/sum((y-mean(y))^2) 1691824476974:RMSE=sqrt(mean(e2)) 1691824476975:return(c(R2=R2,RMSE=RMSE)) 1691824476976:} 1691824476977:result=NULL 1691824476978:for (i in 1:nrow(tuneGrid)) { 1691824476979:cat(paste("Group",i,"starting tuning at",Sys.time()),"\n") 1691824476980:result=rbind(result,plm.localpoly.cv(h=unlist(tuneGrid[i,-1]),p=tuneGrid$p[i])) 1691824476981:} 1691824476982:result=cbind(tuneGrid,result) 1691824476983:bestTune=which.max(result$R2) 1691824476984:h.best=unlist(tuneGrid[bestTune,-1]);names(h.best)=NULL 1691824476985:p.best=tuneGrid$p[bestTune] 1691824476987:return(list(h.best=h.best,p.best=p.best,Performance=result)) 1691824476988:} 1691824476990:plm.localpoly.tune.oos=function(y,X,W,h.grid,p.grid,valid.ind,cl=1) { 1691824476991:tuneGrid=data.frame(p=rep(p.grid,each=nrow(h.grid)),h=h.grid) 1691824476992:plm.localpoly.oos=function(h,p) { 1691824476993:y_hat=plm.localpoly.pred(y=y[-valid.ind],X=X[-valid.ind,],W=W[-valid.ind,], 1691824476994:X.new=X[valid.ind,],W.new=W[valid.ind,],h=h,p=p,cl=cl) 1691824476995:e2=(y[valid.ind]-y_hat)^2 1691824476996:R2=1-sum(e2)/sum((y[valid.ind]-mean(y[valid.ind]))^2) 1691824476997:RMSE=sqrt(mean(e2)) 1691824476998:return(c(R2=R2,RMSE=RMSE)) 1691824476999:} 1691824477000:result=NULL 1691824477001:for (i in 1:nrow(tuneGrid)) { 1691824477002:cat(paste("Group",i,"starting tuning at",Sys.time()),"\n") 1691824477003:result=rbind(result,plm.localpoly.oos(h=unlist(tuneGrid[i,-1]),p=tuneGrid$p[i])) 1691824477004:} 1691824477005:result=cbind(tuneGrid,result) 1691824477006:bestTune=which.max(result$R2) 1691824477007:h.best=unlist(tuneGrid[bestTune,-1]);names(h.best)=NULL 1691824477008:p.best=tuneGrid$p[bestTune] 1691824477009:return(list(h.best=h.best,p.best=p.best,Performance=result)) 1691824477010:} 1691825246098:dat <- read_csv("C:/Users/jizha/OneDrive/Desktop/london_house_price1108data-2011-202240%-18m.csv") 1691825275866:dat <- read_csv("C:/Users/jizha/Desktop/london_house_price1108data-2011-202240%-18m.csv") 1691825282722:View(dat) 1691825290499:df<-dat 1691825290503:df <- df %>% 1691825290506:rename( 1691825290509:average_price =price, 1691825290510:price = price1 1691825290512:) 1691825293326:lianjia <-df 1691825812856:cl=24 1691825812858:library(caret) 1691825812859:# Training sample 1691825812860:set.seed(2021) 1691825812861:train_ind=createDataPartition(1:nrow(lianjia),p=0.7)$Resample1 1691825850355:lianjia.SNP=list() 1691825850356:lianjia.SNP$X=lianjia[,c("square","numberRoom")] 1691825850357:lianjia.SNP$X=cbind(lianjia.SNP$X,predict(dummyVars(~buildingType,data=lianjia), 1691825850358:newdata=lianjia)[,-1]) 1691825850740:lianjia.SNP$X=cbind(lianjia.SNP$X, 1691825850741:predict(dummyVars(~current_energy_rating,data=lianjia),newdata=lianjia)[,-1]) 1691825851195:lianjia.SNP$X=cbind(lianjia.SNP$X,lianjia[,c("age","energy_consumption_current","heating_cost_potential","co2_emissions_current")]) 1691825851238:lianjia.SNP$X=cbind(lianjia.SNP$X,predict(dummyVars(~post_code,data=lianjia),newdata=lianjia)[,-1]) 1691825851378:lianjia.SNP$X=cbind(lianjia.SNP$X,communityAverage=lianjia$communityAverage) 1691825851415:lianjia.SNP$X=cbind(lianjia.SNP$X,employmentAverage=lianjia$employment) 1691825851456:lianjia.SNP$X=cbind(lianjia.SNP$X,communityAverage=lianjia$communityAverage) 1691825851499:lianjia.SNP$X=cbind(lianjia.SNP$X,median_incomeavg=lianjia$median_income) 1691825851551:lianjia.SNP$X=cbind(lianjia.SNP$X,dist=lianjia$dist.center) 1691825851591:lianjia.SNP$X=as.matrix(lianjia.SNP$X) 1691825851602:lianjia.SNP$W=as.matrix(lianjia[,c("Lng","Lat")]) 1691825876002:cl=24 1691825876007:library(caret) 1691825876012:# Training sample 1691825876017:set.seed(123) 1691825876020:train_ind=createDataPartition(1:nrow(lianjia),p=0.7)$Resample1 1691825876138:# OLS 1691825985105:lianjia <-df 1691825985108:cl=24 1691825996322:formula.tree.noc=price~ 1691825996324:square+numberRoom+ 1691825996326:+buildingType+ 1691825996329:age+energy_consumption_current+heating_cost_potential +co2_emissions_current+ 1691825996331:post_code+current_energy_rating+tradeYear+ 1691825996334:communityAverage+employment+median_income 1691825996336:# Tree formula with coordinates 1691825996340:formula.tree=price~ 1691825996344:square+livingRoom+drawingRoom+kitchen+bathRoom+ 1691825996346:floor_type+floor_total+elevator+ladderRatio+ 1691825996348:renovationCondition+buildingType+buildingStructure+ 1691825996350:age+DOM+followers+fiveYearsProperty+ 1691825996351:subway+district+Lng+Lat+tradeYear+ 1691825996352:communityAverage 1691825996353:############# 1691826070189:formula.tree=price~ 1691826070191:square+numberRoom+ 1691826070192:+buildingType+ 1691826070193:age+energy_consumption_current+heating_cost_potential +co2_emissions_current+ 1691826070195:post_code+current_energy_rating+tradeYear+ 1691826070196:communityAverage+employment+median_income+Lat+Lng+dist.center 1691826085727:formula.tree.noc=price~ 1691826085728:square+numberRoom+ 1691826085729:+buildingType+ 1691826085731:age+energy_consumption_current+heating_cost_potential +co2_emissions_current+ 1691826085732:post_code+current_energy_rating+tradeYear+ 1691826085733:communityAverage+employment+median_income+dist.center 1691827244664:cat(paste("Starting GBM with coordinates at",Sys.time()),"\n") 1691827244681:doParallel::registerDoParallel(cl) 1691827259360:model.GBM=train(formula.tree,data=lianjia[train_ind,], 1691827259361:method="gbm",distribution="gaussian", 1691827259362:tuneGrid=data.frame(n.trees=10000, 1691827259363:interaction.depth=20, 1691827259364:shrinkage=0.005, 1691827259365:n.minobsinnode=20), 1691827259366:verbose=F) 1691904059157:pred.GBM=predict(model.GBM,newdata=lianjia[-train_ind,]) 1691904073902:doParallel::stopImplicitCluster()