Sunteți pe pagina 1din 6

Linear Models Assignment: VI

Subhadip Pal
Due date: 28th November 2009

1.
A) We are concentrating regression of BAC on NOB/(Weightp) for different
values of between 0 and 1. We have to estimate popt by obtaining, numerically, the
value of p that minimizes the residual squared error in the regression of BAC on
NOB/(Weightp). popt is a parameter. Based on the observed data we have to get an
estimate of the unknown constant.

To get an estimate we took a very crude method to calculate the Residual Sum Of
square (RSS) assuming the model BAC=α+βNOBWEGHTp+ε for 10000 equidistant
points as a value of. Then we took the point as as estimate of optimal value of p for
which the RSS is minimum. And after implementing the process in R we get
popt=0.5793579

Fig1. Assuming the model BAC=α+βNOBWEGHTp+ε


for different values of the p the Error Sum of Squares
of the model after fitting to the given data is plotted
against the different values of pϵ[0,1]
B)

Fig2. Scatter plot of ( NOBWEIGHTpopt , BAC) data points and


95% prediction band for the future observation .The size of the
data point is increasing function of NOB

C. Assume that in the regression of BAC onNOBWEIGHTpopt , we really do


have simple linear regression, with normal errors, and equal variances. The (point
wise) coverage probability of the band is (c) greater than: 0.95 . Because to get the
95% band we used an estimate of the variance of the errors and the variance
estimate is RSSpopt(n-2) . but RSSpopt ≤ RSSpopt so the band we have formed is
wider than the actual 95% band that we could formed using the popt. Hence the
probability of a point would be inside the band is actually greater that 0 .95 .
2.

A) Amount of Sweat is related with aLoopLengthb that means Ln(S) is linearly


related with Ln(L) where S=Amount of Sweat and L=LoopLength . We are assuming
the model ln( S)=α+βLnL+ε . here α is actually Ln(a) and β=b . In R we fit the
linear model to get estimate of α and β and hence estimate of b=β and a=eα and
we get a=0.1563658 and b=1.3347134

B)

Fig3. 95% prediction band for the amount of sweat as a


function of distance, as distance ranges continuously from 3 to 9.
Appendix:

R code used for problem 1:

######################data import part #########################

data_import<-function(){
#*********** make sure to change the path if the file is else where
**********##########
bac.df<-read.table("C://My Computer//edu//Lm//R data sets//bac.txt",header=T)
#*********** make sure to change the path if the file is else where
**********##########

temp_data<-bac.df
attach(temp_data)
}

########################### First part finding optimal p ###########

ass6<-function(l,indx){
library(lattice)
p<-seq(from=0, to=1,length=l)
opt<-c(1:length(p))

i<-1
while(i<=length(p)) {
X=NOB/(Weight^p[i])
fit1=lm(BAC~X)
opt[i]=deviance(fit1)
if(i%%1000==0){
print(i*100/l)
print("% complete")
}
i=i+1 #residual=residuals(fit1)
}
OptStat =cbind(p,opt)
#indicator=(opt==min(opt))
optVal=subset(OptStat,opt==min(opt))

if(indx==0){
#lines(p,opt,lty=2,col="blue")

#trellis.device(pdf,file="C://My Computer//edu//Lm//optimality.pdf" )
trellis.device(postscript,file="C://My
Computer//edu//Lm//o.ps",horiz=F,width=6,height=4.5)

############################### make sure to change if you want to store the


file elsewhere ######################################

par(cex.axis=1,cex.lab=1.2)
par(mar=c(5,4+1,4,2))
plot(OptStat,xlab="p : exponent of 'Weight' " , ylab="Residual Sum Of
Squares",cex=.2,lty=2)
lines(OptStat[,1],OptStat[,2],lty=1,col="blue", type="l")
j=1
#### while(j<=dim(optVal)[1]){
c1=c(optVal[j,1],optVal[j,1])
c2=c(0,optVal[j,2])
lines(c1,c2,col="red" ,lty=1,type="p",cex=1.3)
lines(c1,c2,col="red" ,lty=1,type="l")
legend(.6,0.0059,c("Optimal value of
p"),col=c("red"),pch=c(21),cex=1.2)
#title("RSS for different p while modelling BAC=a+b*NOB/Weight^p")
}#####
dev.off()
}
return(optVal)
}

################ 1.B second part Scatter plot along with the confidence band
#####################
conf_int<-function(l){
#l=100
opt=ass6(l,1)
X=NOB/(Weight^opt[1,1])
fit1=lm(BAC~X)
library(lattice)

trellis.device(pdf,file="C:\\My Computer\\edu\\Lm\\plot.pdf")
par(cex.axis=1,cex.lab=1.2)
par(mar=c(5,4+1,4,2))
plot(X,BAC,type="p",cex=.3+NOB/mean(NOB),xlab="X : NOB/Weight^p_opt",
ylab="Blood Alcohol content")
abline(fit1)
title("95% prediction band of BAC as a function of NOB/Weight^p")
#legend(.6,0.0059,c("Optimal value of p"),col=c("red"),pch=c(21),cex=1.2)

grid=seq(0,max(X)+.1,length=1000)
nd=data.frame(X=grid)
#newData=grid
#pred=predict(fit1,newdata=nd,interval="confidence")
pred=predict(fit1,newdata=nd,interval="prediction")
lines(grid,pred[,2],lty=1,col="blue")
lines(grid,pred[,3],lty=1,col="blue")
dev.off()
}

################################ to execute the functions


########################################
data_import()
#ass6(10000,0) ###### l=10000 is precission parametre more l means more
precission in estimation and more time to execute
b=conf_int(10000)

R Code used for problem 2:


data_prep <-function(){
jill.df<-read.table("C:\\My Computer\\edu\\Lm\\R data
sets\\sweat.txt",header=T)
jill.df$Sweat<-jill.df$WeightBefore-jill.df$WeightAfter
#attach(jill.df)
new_data=subset(jill.df,Temperature=="Hot")
attach(new_data)
}

ass6.2=function(){
lnY=log(Sweat)
lnL=log(LoopLength)
fit1=lm(lnY~lnL)
a=exp(fit1$coeff[1])
b=coef(fit1)[2]
z=c(a,b)
#b=c(a,b,)
fine.grid=seq(from=2.9,to=9.5,length=1000)
nd=data.frame(lnL=log(fine.grid))
predY=predict(fit1,newdata=nd,interval="prediction")
plot(LoopLength,Sweat)
lines(fine.grid,exp(predY[,2]),col="red", lty=1)
lines(fine.grid,exp(predY[,3]),col="red", lty=1)
lines(fine.grid,exp(predY[,1]),col="brown", lty=1)
#lines(fine.grid,b[1]*fine.grid^b[2],col="green" ,lty=1)
return (z)
}

## to execute the functions


data_prep()
z=ass6.2()