Sunteți pe pagina 1din 6

> #####Challenge 0:########

> # Name of Workshop: Analyze - Distribution Analysis


> # Objective: Understand which probability distribution to use for arrivals.
> # Search the Web for tutorials, examples, hints, etc.
> # Challenge 0: Create a new script and add a multi-line comment
> # at the top with the name of the workshop, your name, and the date.
> # Save the script in your R script folder.
> #
> # Name of Student: Date:
>
>
> #####Challenge 1:########
> # Challenge 1: We will need the following packages:
> # "Companion to Applied Regression," car
> # "Modern Applied Statistics with S," MASS (Note: R is descended from S.)
> # "Normality Test," nortest
> # "Visualizing Categorical Data," vcd
> #
> # Install and load them.
> #Install
> if (!require("car")) install.packages("car")

> if (!require("MASS")) install.packages("MASS")

> if (!require("nortest")) install.packages("nortest")

> if (!require("vcd")) install.packages("vcd")

> #load
> library("car")

> library("MASS")

> library("nortest")

> library("vcd")

> # In RStudio's Session menu, set the working directory to the source
> # file location.
>
>
> #Done
>
> #####Challenge 2:########
> # Challenge 2: Read "OR Cases 3.csv" into a data.frame.
>
> rm(list=ls())

> OR_cases_frame<-read.csv("OR Cases 3.csv", header = TRUE,fileEncoding="UTF-8-


BOM")

> #####Challenge 3:########


> # Challenge 3: Sum the OR cases by week of year. Change the column
> # names to "Week" and "Number.of.cases". Print the first 10 rows.
>
>
Weekly_Dat=data.frame(Week=1:52,Number.of.cases=tapply(OR_cases_frame$Number.of.cas
es, OR_cases_frame$Week, FUN=sum))
> head(Weekly_Dat,10)
Week Number.of.cases
1 1 94
2 2 137
3 3 120
4 4 135
5 5 129
6 6 135
7 7 114
8 8 132
9 9 148
10 10 130

> #####Challenge 4:########


> # Challenge 4: Create a histogram of the number of cases by week.
> # Q: Does this look Poisson? Normal?
>
> # Give the chart file a name.
> png(file = "histogram_WeeklyCases.png")

> # Create the histogram.


> hist(Weekly_Dat$Number.of.cases,xlab = "No_of_Cases", ylab ="Probability", col =
"green",border = "red",freq = FALSE)

> # Q: Does this look Poisson? Normal?


> # None. In poission Rt. Tail >= Left it is more of a starified Binomial
>
> #save
> dev.off()
RStudioGD
2

> #####Challenge 5:########


> # Challenge 5: We would prefer that the arrivals be Markovian
> # so we can use M/M/c queueing models. Arrivals are Markovian
> # if the time between arrivals is exponentially distributed or,
> # equivalently, the number of arrivals by day is Poisson
> # distributed.
> #
> # Let's see if the number of OR cases follows a Poisson distribution.
> #
> #H0: Data, Weekly_Dat$Number.of.cases follows a poission distribution
> #H1: Data, Weekly_Dat$Number.of.cases does not follow a poission distribution
> #Singificance level = 1% =.01
>
> poissonfit= invisible(goodfit(Weekly_Dat$Number.of.cases, type ="poisson",
+ method = "ML"))

> # (b) Create a summary so you can do a hypothesis test:


> summary(poissonfit)

Goodness-of-fit test for poisson distribution

X^2 df P(> X^2)


Likelihood Ratio 117.1492 33 2.37565e-11

> # Since p values, P(> X^2) << 0.01, we reject the Null Hypothesis in favor of
alternate.
> #Data does not follow a Poisson distribution.
>
>
> # (c) Plot the results.
> png(file = "Fit2Poisson_NumberofCases.png")

> plot(poissonfit)

> dev.off()
RStudioGD
2

> poissonfit2=
invisible(goodfit(Weekly_Dat$Number.of.cases[(Weekly_Dat$Number.of.cases
>=quantile(Weekly_Dat$Number.of.cases)[[2]])],
+ type ="poisson",method = "ML"))

> summary(poissonfit2)

Goodness-of-fit test for poisson distribution

X^2 df P(> X^2)


Likelihood Ratio 51.59466 23 0.0005665989

> # Doesn't Help:Since p values, P(> X^2) << 0.01, we reject the Null Hypothesis
in favor of alternate.
> #Data does not follow a Poisson distribution.
> # Doesn't Improve the fit either:
> # poissonfit2=
invisible(goodfit(Weekly_Dat$Number.of.cases[(Weekly_Dat$Number.of.cases
>=quantile(Weekly_Dat$Number.of.cases)[[2]])
> # &
(Weekly_Dat$Number.of.cases <=quantile(Weekly_Dat$Number.of.cases)[[4]])],type
="poisson",method = "ML"))
> #
> # summary(poissonfit2)
> plot(poissonfit2)

> #####Challenge 6:########


> # Challenge 6: Suppose the data is Poisson.
> # (a) Estimate the Poisson parameter, lambda.
>
>
> parms<- fitdistr(Weekly_Dat$Number.of.cases, "poisson")

> lambda <- parms$estimate

> stdev <- parms$sd

> # (b) Estimate the 95% confidence interval of lambda.


> ci <- c(lambda + c(-1,1) * 1.96 * stdev)

> #####Challenge 7:########


> # Challenge 7: Suppose the data is Poisson and we want to generate
> # trials for a simulation. Generate 100 trials.
> Simtrials = rpois(100, lambda)

> #####Challenge 8:########


> # Challenge 8: Let's see if the number of OR cases follows a binomial
distribution.
> # A binomial distribution is based on the count of successful Bernoulli trials.
> # (Which begs the question, what is a "trial" when counting number of cases?)
> # (a) Perform a goodness of fit test with maximum likelihood as
> # the goodness of fit criterion.
>
> binomfit= invisible(goodfit(Weekly_Dat$Number.of.cases, type ="binomial",
+ method = "ML", ))

> # (b) Create a summary so you can do a hypothesis test:


> # H0: Data is binomial-distributed. H1: Data is not binomial-distributed.
>
> summary(binomfit)

Goodness-of-fit test for binomial distribution

X^2 df P(> X^2)


Likelihood Ratio 562.2014 33 1.535471e-97

> #Reject H0 in favor of H1


>
> binomfit=
invisible(goodfit(Weekly_Dat$Number.of.cases[(Weekly_Dat$Number.of.cases
>=quantile(Weekly_Dat$Number.of.cases)[[2]])
+ & (Weekly_Dat$Number.of.cases
<=quantile(Weekly_Dat$Number.of.cases)[[4]])], type ="binomial",method = "ML"))

> summary(binomfit)

Goodness-of-fit test for binomial distribution

X^2 df P(> X^2)


Likelihood Ratio 118.2337 12 1.389673e-19

> # # (c) Plot the results.


> png(file = "binomfit_NumberofCases.png")

> plot(binomfit)

> dev.off()
RStudioGD
2

> # Documentation: https://www.rdocumentation.org/packages/vcd/versions/1.4-


4/topics/goodfit
> # Q: Is the data binomial-distributed?
>
> #ANs: No.
>
>
>
> #####Challenge 9:########
> # Challenge 9: The normal distribution is sometimes a good approximation for
Poisson
> # and binomial distributions, among others. Create a Q-Q plot of the number of
cases.
> png(file = "Q-Qplot.png")

> qqnorm(Weekly_Dat$Number.of.cases)
> qqline(Weekly_Dat$Number.of.cases)

> dev.off()
RStudioGD
2

> #####Challenge 10:########


> # Challenge 10: Let's test if the number of OR cases follows a normal
distribution.
> # (a) Conduct a statistical test of normality, such as Anderson-Darling,
> # Kolmogorov-Smirno, or Wilks-Shapiro (Lillie).
> shapiro.test(Weekly_Dat$Number.of.cases)

Shapiro-Wilk normality test

data: Weekly_Dat$Number.of.cases
W = 0.92171, p-value = 0.00217

> # (b) Test the hypothesis:


> # H0: Data is normally-distributed. H1: Data is not normally-distributed.
>
> #Assume Significance level =0.005
> # We fail to Reject the null hypothesis
>
> # Q: Is the data normally-distributed?
>
> #The data may be normally distributed, since we fail to reject the hypothesis
that
> # Data is normally-distributed.
> #We usually come to the fact the we could not reject the H0 after multiple
> #Comparsions.
>
>
>
>
>
>
>
> #####Challenge 11:########
> # Challenge 11: Suppose we cannot identify a reasonable distribution,
> # but we still want to generate trials for a simulation.
> # (a) Generate 100 trials by sampling with replacement from the dataset.
> S1= sample(Weekly_Dat$Number.of.cases, 100, replace = TRUE)

> # (b) Create a histogram of the 100 trials.


> png(file("ResamplingHistS1.png"))

> p1= hist(S1,freq = FALSE)

> # Note: This is called "resampling."


> # Q: Does this histogram look like the original data's histogram?
> p2 = hist(Weekly_Dat$Number.of.cases,freq = FALSE)

> plot(p1,main="Resampling vs Original Hist" ,xlab =


"No_of_Cases",ylab="Probability",col="antiquewhite1")

> plot(p2, col = "azure",add = TRUE)


> dev.off()
RStudioGD
2

> #Similar Histograms

S-ar putea să vă placă și