CompleteFinalAssignment HadiRaza

Question # 1
> training_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final

Assignment/titanic_training.csv")
> testing_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final
Assignment/titanic_testing.csv")
> glmmodel <- glm(survived ~ age+fare+sibsp+sex+pclass, data= training_data,
family=binomial("logit"))
> glm_predict <- ifelse(predict(glmmodel,testing_data)>.5,1,0)
confusionMatrix(glm_predict,testing_data$survived)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 166 41
1 16 81
Accuracy : 0.8125
95% CI : (0.764, 0.8548)
No Information Rate : 0.5987
P-Value [Acc > NIR] : 1.063e-15
Kappa : 0.5962
Mcnemar's Test P-Value : 0.001478
Sensitivity : 0.9121
Specificity : 0.6639
Pos Pred Value : 0.8019
Neg Pred Value : 0.8351
Prevalence : 0.5987
Detection Rate : 0.5461
Detection Prevalence : 0.6809
Balanced Accuracy : 0.7880
'Positive' Class : 0
> dtmodel <- rpart(survived ~ age+fare+sibsp+sex+pclass, data=training_data,m

ethod="class")
> dt_predict <- predict(dtmodel,testing_data,type="class")
> confusionMatrix(dt_predict,testing_data$survived)
Reference
Prediction 0 1
0 220 41
1 28 103
Accuracy : 0.824
95% CI : (0.7826, 0.8604)
No Information Rate : 0.6327
P-Value [Acc > NIR] : <2e-16
Kappa : 0.614
Mcnemar's Test P-Value : 0.1486
Specificity : 0.7153
Pos Pred Value : 0.8429
Neg Pred Value : 0.7863
Prevalence : 0.6327
Balanced Accuracy : 0.8012
> svmmodel <- svm(survived ~ age+fare+sibsp+sex+pclass, data=training_data)

> svm_predict <- ifelse(predict(svmmodel,testing_data,na.action = na.exclude)
>.5,1,0)
> confusionMatrix(svm_predict,testing_data$survived)
Reference
Prediction 0 1
0 38 0
1 1 0
Accuracy : 0.9744
95% CI : (0.8652, 0.9994)
No Information Rate : 1
P-Value [Acc > NIR] : 1
Kappa : 0
Mcnemar's Test P-Value : 1
Specificity : NA
Pos Pred Value : NA
Neg Pred Value : NA
Prevalence : 1.0000
Balanced Accuracy : NA
Question # 2
> rules <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.97))
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen max
len target
0.97 0.1 1 none FALSE TRUE 5 0.001 1
10 rules
ext
FALSE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE
Absolute minimum support count: 9
set item appearances ...[0 item(s)] done [0.00s].

set transactions ...[10004 item(s), 9835 transaction(s)] done [0.01s].
sorting and recoding items ... [157 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 done [0.02s].
writing ... [28 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
> rules
set of 28 rules
> inspect(rules[1:25])
lhs rhs support confidence lift
[1] {rice,
sugar} => {whole milk} 0.0012 1 3.9
[2] {canned fish,
hygiene articles} => {whole milk} 0.0011 1 3.9
[3] {butter,
rice,
root vegetables} => {whole milk} 0.0010 1 3.9
[4] {flour,
root vegetables,
whipped/sour cream} => {whole milk} 0.0017 1 3.9
[5] {butter,
domestic eggs,
soft cheese} => {whole milk} 0.0010 1 3.9
[6] {citrus fruit,
root vegetables,
soft cheese} => {other vegetables} 0.0010 1 5.2
[7] {butter,
hygiene articles,
pip fruit} => {whole milk} 0.0010 1 3.9
[8] {hygiene articles,
root vegetables,
[9] {hygiene articles,
pip fruit,
root vegetables} => {whole milk} 0.0010 1 3.9
[10] {cream cheese,
domestic eggs,
[11] {curd,
domestic eggs,
[12] {cream cheese,
domestic eggs,
napkins} => {whole milk} 0.0011 1 3.9
[13] {brown bread,
pip fruit,
whipped/sour cream} => {other vegetables} 0.0011 1 5.2
[14] {grapes,
tropical fruit,
whole milk,
yogurt} => {other vegetables} 0.0010 1 5.2
[15] {ham,
pip fruit,
tropical fruit,
yogurt} => {other vegetables} 0.0010 1 5.2
[16] {ham,
pip fruit,
tropical fruit,
whole milk} => {other vegetables} 0.0011 1 5.2
[17] {oil,
root vegetables,
tropical fruit,
yogurt} => {whole milk} 0.0011 1 3.9
[18] {oil,
other vegetables,
root vegetables,
yogurt} => {whole milk} 0.0014 1 3.9
[19] {butter,
other vegetables,
root vegetables,
white bread} => {whole milk} 0.0010 1 3.9
[20] {butter,
other vegetables,
pork,
[21] {butter,
domestic eggs,
other vegetables,
[22] {butter,
fruit/vegetable juice,
tropical fruit,
[23] {newspapers,
rolls/buns,
soda,
whole milk} => {other vegetables} 0.0010 1 5.2
[24] {citrus fruit,
pastry,
rolls/buns,
[25] {citrus fruit,
root vegetables,
tropical fruit,
Question: 3
DHA Karachi
houseIndexDHA <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final Assignment/House-
index-karachi-dha.csv")
head(houseIndexDHA)
names(houseIndexDHA) <- c("date","index")
dates <- as.Date(houseIndexDHA$Date, format="%m/%d/%Y")
houseIndexDHA$Year <- as.numeric(format(dates,"%Y"))
houseIndexDHA$Month <- as.numeric(format(dates,"%m"))
head(houseIndexDHA)
boxplot(houseIndexDHA$Index ~ houseIndexDHA$Year,data = houseIndexDHA)
plot(f)
plot(f$time.series[1:12,"seasonal"],type = 'b', xlab = "Month", ylab = "Seasonal Components")

fit <- arima(hpi, order=c(2,0,1), seasonal=list(order=c(2,1,0), period=12))
fore <- predict(fit, n.ahead=48)
U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se
ts.plot(hpi, fore$pred, U, L, col=c("black","blue","green","red"),lty=c(1,5,2,2),
gpars=list(xaxt="n",xlab=""), ylab="Index", main="House Price Trading
Index Forecast(DHA)")
fore
Conclusion
Aug 2017: 489
Aug 2019: 590
(590-489)/ 489 x 100 = 20.65 % grow in 2 years
On Average 10% growth per year.
If we invest 10M Today, we will get 12 M (10 x 20%) after 2 years.
DHA is a better option for investment as compare to KSE100.
KSE-100
dates <- as.Date(kse100$Date, format="%d-%b-%Y")
kse100$Year <- as.numeric(format(dates,"%Y"))
kse100$Month <- as.numeric(format(dates,"%m"))
head(kse100)
boxplot(kse100$Close ~ kse100$Year,data = kse100)

hpi <- ts(kse100$Close, start = c(2014,1), frequency = 12)
f <- stl(hpi,"per")
plot(f)
plot(f$time.series[1:12,"seasonal"],type = 'b', xlab = "Month", ylab = "Seasonal Components")

fit <- arima(hpi, order=c(2,0,1), seasonal=list(order=c(2,1,0), period=12))
fore <- predict(fit, n.ahead=48)
U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se
ts.plot(hpi, fore$pred, U, L, col=c("black","blue","green","red"),lty=c(1,5,2,2),
gpars=list(xaxt="n",xlab=""), ylab="Index", main="KSE-100 Index Forecast")
fore
Conclusion
Aug 17: 46,533 (from kse100.csv)
Aug 19: 53,543
(53,543 – 46,533)/ 46,533 x 100 = 15% growth in 2 years
On average annual market growth is 7.5%.
If we invest 10M Today, we will get 11.5 M (10 x 15%) after 2 years.
Question: 4
drivers_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final
Assignment/drivers_data.csv")
library(ggplot2)
ggplot(drivers_data, aes(drivers_data$Distance_Feature,drivers_data$Speeding_Feature
))+geom_point()
head(drivers_data)
drivers_cluster <- kmeans(drivers_data[,2:3],4,nstart = 20)
drivers_cluster
table(drivers_cluster$cluster,drivers_data$Speeding_Feature)
drivers_cluster$cluster <- as.factor(drivers_cluster$cluster)
ggplot(drivers_data, aes(drivers_data$Distance_Feature,drivers_data$Speeding_Feature, color =

drivers_cluster$cluster ))+geom_point()

CompleteFinalAssignment HadiRaza

Încărcat de

Informații document

Titlu original

Drepturi de autor

Formate disponibile

Partajați acest document

Partajați sau inserați document

Opțiuni de partajare

Vi se pare util acest document?

Este necorespunzător acest conținut?

Drepturi de autor:

Formate disponibile

CompleteFinalAssignment HadiRaza

Încărcat de

Drepturi de autor:

Formate disponibile

Question # 1

> training_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final

Confusion Matrix and Statistics

> dtmodel <- rpart(survived ~ age+fare+sibsp+sex+pclass, data=training_data,m

> svmmodel <- svm(survived ~ age+fare+sibsp+sex+pclass, data=training_data)

Confusion Matrix and Statistics

Absolute minimum support count: 9

set item appearances ...[0 item(s)] done [0.00s].

names(houseIndexDHA) <- c("date","index")

dates <- as.Date(houseIndexDHA$Date, format="%m/%d/%Y")

houseIndexDHA$Year <- as.numeric(format(dates,"%Y"))

houseIndexDHA$Month <- as.numeric(format(dates,"%m"))

boxplot(houseIndexDHA$Index ~ houseIndexDHA$Year,data = houseIndexDHA)

plot(f$time.series[1:12,"seasonal"],type = 'b', xlab = "Month", ylab = "Seasonal Components")

If we invest 10M Today, we will get 12 M (10 x 20%) after 2 years.

DHA is a better option for investment as compare to KSE100.

kse100$Year <- as.numeric(format(dates,"%Y"))

kse100$Month <- as.numeric(format(dates,"%m"))

boxplot(kse100$Close ~ kse100$Year,data = kse100)

plot(f$time.series[1:12,"seasonal"],type = 'b', xlab = "Month", ylab = "Seasonal Components")

drivers_cluster <- kmeans(drivers_data[,2:3],4,nstart = 20)

drivers_cluster$cluster <- as.factor(drivers_cluster$cluster)

ggplot(drivers_data, aes(drivers_data$Distance_Feature,drivers_data$Speeding_Feature, color =

S-ar putea să vă placă și