Documente Academic
Documente Profesional
Documente Cultură
Command
Purpose
help()
example()
c(), scan()
seq()
rep()
data()
View()
str()
read.csv(), read.table()
library(), require()
dim()
length()
ls()
rm()
names()
hist()
histogram()
stem()
table()
xtabs()
mosaicplot()
cut()
mean(), median()
by()
summary()
var(), sd()
sum()
quantile()
barplot()
barchart()
boxplot()
bwplot()
Command
Purpose
plot()
xyplot()
lm()
anova()
predict()
nls()
residuals()
sample()
replicate()
cumsum()
ecdf()
dbinom(), etc.
dpois(), etc.
pnorm(), etc.
qt(), etc.
pchisq(), etc.
binom.test()
prop.test()
chisq.test()
fisher.test()
t.test()
qqnorm(), qqline()
addmargins()
prop.table()
par()
power.t.test()
anova()
Produces a scatterplot
Lattice command for producing a scatterplot
Determine the least-squares regression line
Analysis of variance (can use on results of lm())
Obtain predicted values from linear model
estimate parameters of a nonlinear model
gives (observed - predicted) for a model fit to data
take a sample from a vector of data
repeat some process a set number of times
produce running total of values for input vector
builds empirical cumulative distribution function
tools for binomial distributions
tools for Poisson distributions
tools for normal distributions
tools for student t distributions
tools for chi-square distributions
hypothesis test and confidence interval for 1 proportion
inference for 1 proportion using normal approx.
carries out a chi-square test
Fisher test for contingency table
student t test for inference on population mean
tools for checking normality
adds marginal sums to an existing table
compute proportions from a contingency table
query and edit graphical settings
power calculations for 1- and 2-sample t
compute analysis of variance table for fitted model
Examples of usage
Examples of usage
help()
help(mean)
example()
require(lattice)
example(histogram)
"Luke"
"Anakin" "Leia"
"Jacen"
"Jaina"
"Petal.Length" "Petal.Width"
"Species"
> dim(iris)
[1] 150
> str(iris)
'data.frame':
$ Sepal.Length:
$ Sepal.Width :
$ Petal.Length:
$ Petal.Width :
$ Species
:
> View(iris)
Examples of usage
ls(), rm()
>
>
>
>
>
data(iris)
data(faithful)
data(Puromycin)
data(LakeHuron)
ls()
[1] "faithful"
"heartDeck" "iris"
"LakeHuron" "names"
"Puromycin" "x"
"y"
"LakeHuron" "names"
"heartDeck" "iris"
> rm(faithful)
> ls()
[1] "heartDeck" "iris"
"LakeHuron" "names"
"y"
hist()
data(faithful)
hist(faithful$eruptions)
hist(faithful$eruptions, n=15)
hist(faithful$eruptions, breaks=seq(1.5,5.25,.25), col="red")
hist(faithful$eruptions, freq=F, n=15, main="Histogram of Old Faithful Eruption Times", xlab="Duration (mins)")
0.4
0.2
0.0
Density
0.6
1.5
2.0
2.5
3.0
3.5
4.0
Duration (mins)
4.5
5.0
Examples of usage
library(), require()
> library(abd)
> require(lattice)
histogram()
require(lattice)
data(iris)
histogram(iris$Sepal.Length, breaks=seq(4,8,.25))
histogram(~ Sepal.Length, data=iris, main="Iris Sepals", xlab="Length")
histogram(~ Sepal.Length | Species, data=iris, col="red")
histogram(~ Sepal.Length | Species, data=iris, n=15, layout=c(1,3))
read.csv()
> As.in.H2O = read.csv("http://www.calvin.edu/~scofield/data/comma/arsenicInWater.csv")
read.table()
> senate = read.table("http://www.calvin.edu/~scofield/data/tab/rc/senate99.dat", sep="\t", header=T)
"State"
"LandArea"
"Population"
"DensityPop"
"HousingUnits"
"TotalArea"
"DensityHousing"
> x = counties$LandArea
> mean(x, na.rm = T)
[1] 1126.214
> median(x, na.rm = T)
[1] 616.48
> summary(x)
Min.
1.99
1st Qu.
431.70
Median
616.50
Mean
1126.00
3rd Qu.
Max.
923.20 145900.00
20%
403.29
40%
554.36
60%
717.94
80%
100%
1043.82 145899.69
Examples of usage
sum()
> firstTwentyIntegers = 1:20
> sum(firstTwentyIntegers)
[1] 210
>
>
>
>
die = 1:6
manyRolls = sample(die, 100, replace=T)
sixFreq = sum(manyRolls == 6)
sixFreq / 100
[1] 0.14
stem()
> monarchs = read.csv("http://www.calvin.edu/~scofield/data/comma/monarchReigns.csv")
> stem(monarchs$years)
The decimal point is 1 digit(s) to the right of the |
0
1
2
3
4
5
6
|
|
|
|
|
|
|
0123566799
0023333579
012224455
355589
4
069
3
Male
88
Female
Male
Male
88
Examples of usage
pol
Far RightLiberal
Middleoftheroad
Male
sex
Female
Conservative
Political04
1
1
2
1
3
1
5
1
6
2
7
1
9 10 12 13 15 17 19 20 21 22 24 25 33 35 38 39 44 50 56 59 63
2 2 1 4 1 1 1 1 1 3 2 2 1 3 1 1 1 1 1 1 1
2
1
3
1
5
1
6
2
7
1
9 10 12 13 15 17 19 20 21 22 24 25 33 35 38 39 44 50 56 59 63
2 2 1 4 1 1 1 1 1 3 2 2 1 3 1 1 1 1 1 1 1
(5,10] (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] (45,50] (50,55] (55,60]
7
6
3
8
0
4
2
1
1
0
2
Examples of usage
barplot()
40
80
pol = read.csv("http://www.calvin.edu/~stob/data/csbv.csv")
barplot(table(pol$Political04), main="Political Leanings, Calvin Freshman 2004")
barplot(table(pol$Political04), horiz=T)
barplot(table(pol$Political04),col=c("red","green","blue","orange"))
barplot(table(pol$Political04),col=c("red","green","blue","orange"),
names=c("Conservative","Far Right","Liberal","Centrist"))
Conservative
Liberal
Centrist
60
10
20
30
40
50
Female
Male
Conservative
Far Right
Liberal
Middleoftheroad
Examples of usage
boxplot()
data(iris)
boxplot(iris$Sepal.Length)
boxplot(iris$Sepal.Length, col="yellow")
boxplot(Sepal.Length ~ Species, data=iris)
boxplot(Sepal.Length ~ Species, data=iris, col="yellow", ylab="Sepal length",main="Iris Sepal Length by Species")
6.5
5.5
4.5
Sepal length
7.5
setosa
versicolor
virginica
plot()
90
70
50
data(faithful)
plot(waiting~eruptions,data=faithful)
plot(waiting~eruptions,data=faithful,cex=.5)
plot(waiting~eruptions,data=faithful,pch=6)
plot(waiting~eruptions,data=faithful,pch=19)
plot(waiting~eruptions,data=faithful,cex=.5,pch=19,col="blue")
plot(waiting~eruptions, data=faithful, cex=.5, pch=19, col="blue", main="Old Faithful Eruptions",
ylab="Wait time between eruptions", xlab="Duration of eruption")
1.5
2.0
2.5
3.0
3.5
4.0
Duration of eruption
4.5
5.0
Examples of usage
sample()
> sample(c("Heads","Tails"), size=1)
[1] "Heads"
> sample(c("Heads","Tails"), size=10, replace=T)
[1] "Heads" "Heads" "Heads" "Tails" "Tails" "Tails" "Tails" "Tails" "Tails" "Heads"
> sample(c(0, 1), 10, replace=T)
[1] 1 0 0 1 1 0 0 1 0 0
> sum(sample(1:6, 2, replace=T))
[1] 10
> sample(c(0, 1), prob=c(.25,.75), size=10, replace=T)
[1] 1 1 1 0 1 1 1 1 1 1
> sample(c(rep(1,13),rep(0,39)), size=5, replace=F)
[1] 0 0 0 0 0
replicate()
> sample(c("Heads","Tails"), 2, replace=T)
[1] "Tails" "Heads"
> replicate(5, sample(c("Heads","Tails"), 2, replace=T))
[,1]
[,2]
[,3]
[,4]
[,5]
[1,] "Heads" "Tails" "Heads" "Tails" "Heads"
[2,] "Heads" "Tails" "Heads" "Heads" "Heads"
> ftCount = replicate(100000, sum(sample(c(0, 1), 10, rep=T, prob=c(.6, .4))))
> hist(ftCount, freq=F, breaks=-0.5:10.5, xlab="Free throws made out of 10 attempts",
+
main="Simulated Sampling Dist. for 40% FT Shooter", col="green")
0.15
0.00
Density
10
Examples of usage
[1] 0.03125
> dbinom(0:5, 5, .5)
[1] 0.5
> pbinom(2, 5, .5)
[1] 0.5
> flip5 = replicate(10000, sum(sample(c("H","T"), 5, rep=T)=="H"))
> table(flip5) / 10000
# distribution (simulated) of count of heads in 5 flips
flip5
0
1
2
3
4
5
0.0310 0.1545 0.3117 0.3166 0.1566 0.0296
> table(rbinom(10000, 5, .5)) / 10000
0
1
2
3
4
5
0.0304 0.1587 0.3087 0.3075 0.1634 0.0313
> qbinom(seq(0,1,.2), 50, .2)
[1]
9 11 12 50
10
Examples of usage
[1] 0.679813
> pchisq(3.1309, df=5, lower.tail=F)
# same as above
[1] 0.679813
> qchisq(c(.001,.005,.01,.025,.05,.95,.975,.99,.995,.999), 2)
[1]
[8]
0.050635616
0.102586589
5.991464547
0.050635616
0.102586589
7.377758908
# same as above
5.991464547
7.377758908
addmargins()
> blood = read.csv("http://www.calvin.edu/~scofield/data/comma/blood.csv")
> t = table(blood$Rh, blood$type)
> addmargins(t)
# to add both row/column totals
Neg
Pos
Sum
A
6
34
40
AB
1
3
4
B
2
9
11
O Sum
7 16
38 84
45 100
> addmargins(t, 1)
A AB B O
Neg 6 1 2 7
Pos 34 3 9 38
Sum 40 4 11 45
> addmargins(t, 2)
A AB
Neg 6 1
Pos 34 3
B O Sum
2 7 16
9 38 84
11
Examples of usage
prop.table()
>
>
>
>
>
smoke = matrix(c(51,43,22,92,28,21,68,22,9),ncol=3,byrow=TRUE)
colnames(smoke) = c("High","Low","Middle")
rownames(smoke) = c("current","former","never")
smoke = as.table(smoke)
smoke
20 40 60 80
current
former
never
High
Low
12
Middle
Examples of usage
par()
>
>
>
>
>
par(mfrow = c(1,2))
# set figure so next two plots appear side-by-side
poisSamp = rpois(50, 3)
# Draw sample of size 50 from Pois(3)
maxX = max(poisSamp)
# will help in setting horizontal plotting region
hist(poisSamp, freq=F, breaks=-.5:(maxX+.5), col="green", xlab="Sampled values")
plot(0:maxX, dpois(0:maxX, 3), type="h", ylim=c(0,.25), col="blue", main="Probabilities for Pois(3)")
0.00
0.10
0.20
0.15
0.00
Density
0.30
Histogram of poisSamp
0 1 2 3 4 5 6 7
Sampled values
0:maxX
fisher.test()
> blood = read.csv("http://www.calvin.edu/~scofield/data/comma/blood.csv")
> tblood = xtabs(~Rh + type, data=blood)
> tblood
# contingency table for blood type and Rh factor
type
Rh
A AB
Neg 6 1
Pos 34 3
B O
2 7
9 38
> chisq.test(tblood)
Pearson's Chi-squared test
data: tblood
X-squared = 0.3164, df = 3, p-value = 0.957
> fisher.test(tblood)
Fisher's Exact Test for Count Data
data: tblood
p-value = 0.8702
alternative hypothesis: two.sided
13
Examples of usage
dpois(), ppois()
> dpois(2:7, 4.2)
[1] 0.077977
> 1 - ppois(7, 4.2)
[1] 0.06394334
[1] 0.2524925
> qnorm(c(.95, .975, .995))
0.00
0.10
0.20
Density
>
>
>
>
nSamp
14
10
12
Examples of usage
[1] 0.02980016
>
>
>
>
+
>
>
>
>
+
0.2
std. normal
t, df=1
t, df=4
t, df=10
0.0
pdf values
0.4
xs
by()
> data(warpbreaks)
> by(warpbreaks$breaks, warpbreaks$tension, mean)
warpbreaks$tension: L
[1] 36.38889
--------------------------------------------------------------------------warpbreaks$tension: M
[1] 26.38889
--------------------------------------------------------------------------warpbreaks$tension: H
[1] 21.66667
15
Examples of usage
t.test()
> data(sleep)
> t.test(extra ~ group, data=sleep)
qqnorm(), qqline()
Normal QQ Plot
50
30
10
Theoretical Quantiles
16
Examples of usage
power.t.test()
> power.t.test(n=20, delta=.1, sd=.4, sig.level=.05)
=
=
=
=
=
=
20
0.1
0.4
0.05
0.1171781
two.sided
=
=
=
=
=
=
252.1281
0.1
0.4
0.05
0.8
two.sided
anova()
require(lattice)
require(abd)
data(JetLagKnees)
xyplot(shift ~ treatment, JetLagKnees, type=c('p','a'), col="navy", pch=19, cex=.5)
anova( lm( shift ~ treatment, JetLagKnees ) )
Analysis of Variance Table
Response: shift
Df Sum Sq Mean Sq F value
Pr(>F)
treatment 2 7.2245 3.6122 7.2894 0.004472 **
Residuals 19 9.4153 0.4955
--Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
shift
control
eyes
treatment
17
knee