Sunteți pe pagina 1din 13

Question # 1

> training_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final


Assignment/titanic_training.csv")
> testing_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final
Assignment/titanic_testing.csv")
> glmmodel <- glm(survived ~ age+fare+sibsp+sex+pclass, data= training_data,
family=binomial("logit"))
> glm_predict <- ifelse(predict(glmmodel,testing_data)>.5,1,0)
confusionMatrix(glm_predict,testing_data$survived)

Confusion Matrix and Statistics

Reference
Prediction 0 1
0 166 41
1 16 81

Accuracy : 0.8125
95% CI : (0.764, 0.8548)
No Information Rate : 0.5987
P-Value [Acc > NIR] : 1.063e-15

Kappa : 0.5962
Mcnemar's Test P-Value : 0.001478

Sensitivity : 0.9121
Specificity : 0.6639
Pos Pred Value : 0.8019
Neg Pred Value : 0.8351
Prevalence : 0.5987
Detection Rate : 0.5461
Detection Prevalence : 0.6809
Balanced Accuracy : 0.7880
'Positive' Class : 0

> dtmodel <- rpart(survived ~ age+fare+sibsp+sex+pclass, data=training_data,m


ethod="class")
> dt_predict <- predict(dtmodel,testing_data,type="class")
> confusionMatrix(dt_predict,testing_data$survived)
Confusion Matrix and Statistics

Reference
Prediction 0 1
0 220 41
1 28 103

Accuracy : 0.824
95% CI : (0.7826, 0.8604)
No Information Rate : 0.6327
P-Value [Acc > NIR] : <2e-16

Kappa : 0.614
Mcnemar's Test P-Value : 0.1486
Sensitivity : 0.8871
Specificity : 0.7153
Pos Pred Value : 0.8429
Neg Pred Value : 0.7863
Prevalence : 0.6327
Detection Rate : 0.5612
Detection Prevalence : 0.6658
Balanced Accuracy : 0.8012
'Positive' Class : 0

> svmmodel <- svm(survived ~ age+fare+sibsp+sex+pclass, data=training_data)


> svm_predict <- ifelse(predict(svmmodel,testing_data,na.action = na.exclude)
>.5,1,0)
> confusionMatrix(svm_predict,testing_data$survived)

Confusion Matrix and Statistics

Reference
Prediction 0 1
0 38 0
1 1 0

Accuracy : 0.9744
95% CI : (0.8652, 0.9994)
No Information Rate : 1
P-Value [Acc > NIR] : 1

Kappa : 0
Mcnemar's Test P-Value : 1

Sensitivity : 0.9744
Specificity : NA
Pos Pred Value : NA
Neg Pred Value : NA
Prevalence : 1.0000
Detection Rate : 0.9744
Detection Prevalence : 0.9744
Balanced Accuracy : NA

'Positive' Class : 0
Question # 2
> rules <- apriori(Groceries, parameter = list(supp = 0.001, conf = 0.97))
Apriori

Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen max
len target
0.97 0.1 1 none FALSE TRUE 5 0.001 1
10 rules
ext
FALSE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE

Absolute minimum support count: 9

set item appearances ...[0 item(s)] done [0.00s].


set transactions ...[10004 item(s), 9835 transaction(s)] done [0.01s].
sorting and recoding items ... [157 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 6 done [0.02s].
writing ... [28 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
> rules
set of 28 rules

> inspect(rules[1:25])
lhs rhs support confidence lift
[1] {rice,
sugar} => {whole milk} 0.0012 1 3.9
[2] {canned fish,
hygiene articles} => {whole milk} 0.0011 1 3.9
[3] {butter,
rice,
root vegetables} => {whole milk} 0.0010 1 3.9
[4] {flour,
root vegetables,
whipped/sour cream} => {whole milk} 0.0017 1 3.9
[5] {butter,
domestic eggs,
soft cheese} => {whole milk} 0.0010 1 3.9
[6] {citrus fruit,
root vegetables,
soft cheese} => {other vegetables} 0.0010 1 5.2
[7] {butter,
hygiene articles,
pip fruit} => {whole milk} 0.0010 1 3.9
[8] {hygiene articles,
root vegetables,
whipped/sour cream} => {whole milk} 0.0010 1 3.9
[9] {hygiene articles,
pip fruit,
root vegetables} => {whole milk} 0.0010 1 3.9
[10] {cream cheese,
domestic eggs,
sugar} => {whole milk} 0.0011 1 3.9
[11] {curd,
domestic eggs,
sugar} => {whole milk} 0.0010 1 3.9
[12] {cream cheese,
domestic eggs,
napkins} => {whole milk} 0.0011 1 3.9
[13] {brown bread,
pip fruit,
whipped/sour cream} => {other vegetables} 0.0011 1 5.2
[14] {grapes,
tropical fruit,
whole milk,
yogurt} => {other vegetables} 0.0010 1 5.2
[15] {ham,
pip fruit,
tropical fruit,
yogurt} => {other vegetables} 0.0010 1 5.2
[16] {ham,
pip fruit,
tropical fruit,
whole milk} => {other vegetables} 0.0011 1 5.2
[17] {oil,
root vegetables,
tropical fruit,
yogurt} => {whole milk} 0.0011 1 3.9
[18] {oil,
other vegetables,
root vegetables,
yogurt} => {whole milk} 0.0014 1 3.9
[19] {butter,
other vegetables,
root vegetables,
white bread} => {whole milk} 0.0010 1 3.9
[20] {butter,
other vegetables,
pork,
whipped/sour cream} => {whole milk} 0.0010 1 3.9
[21] {butter,
domestic eggs,
other vegetables,
whipped/sour cream} => {whole milk} 0.0012 1 3.9
[22] {butter,
fruit/vegetable juice,
tropical fruit,
whipped/sour cream} => {other vegetables} 0.0010 1 5.2
[23] {newspapers,
rolls/buns,
soda,
whole milk} => {other vegetables} 0.0010 1 5.2
[24] {citrus fruit,
pastry,
rolls/buns,
whipped/sour cream} => {whole milk} 0.0010 1 3.9
[25] {citrus fruit,
root vegetables,
tropical fruit,
whipped/sour cream} => {other vegetables} 0.0012 1 5.2
Question: 3

DHA Karachi
houseIndexDHA <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final Assignment/House-
index-karachi-dha.csv")

head(houseIndexDHA)

names(houseIndexDHA) <- c("date","index")

dates <- as.Date(houseIndexDHA$Date, format="%m/%d/%Y")

houseIndexDHA$Year <- as.numeric(format(dates,"%Y"))

houseIndexDHA$Month <- as.numeric(format(dates,"%m"))

head(houseIndexDHA)

boxplot(houseIndexDHA$Index ~ houseIndexDHA$Year,data = houseIndexDHA)

plot(f)

plot(f$time.series[1:12,"seasonal"],type = 'b', xlab = "Month", ylab = "Seasonal Components")


fit <- arima(hpi, order=c(2,0,1), seasonal=list(order=c(2,1,0), period=12))
fore <- predict(fit, n.ahead=48)
U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se
ts.plot(hpi, fore$pred, U, L, col=c("black","blue","green","red"),lty=c(1,5,2,2),
gpars=list(xaxt="n",xlab=""), ylab="Index", main="House Price Trading
Index Forecast(DHA)")
fore

Conclusion
Aug 2017: 489
Aug 2019: 590
(590-489)/ 489 x 100 = 20.65 % grow in 2 years
On Average 10% growth per year.

If we invest 10M Today, we will get 12 M (10 x 20%) after 2 years.

DHA is a better option for investment as compare to KSE100.

KSE-100
dates <- as.Date(kse100$Date, format="%d-%b-%Y")

kse100$Year <- as.numeric(format(dates,"%Y"))

kse100$Month <- as.numeric(format(dates,"%m"))

head(kse100)

boxplot(kse100$Close ~ kse100$Year,data = kse100)


hpi <- ts(kse100$Close, start = c(2014,1), frequency = 12)
f <- stl(hpi,"per")
plot(f)

plot(f$time.series[1:12,"seasonal"],type = 'b', xlab = "Month", ylab = "Seasonal Components")


fit <- arima(hpi, order=c(2,0,1), seasonal=list(order=c(2,1,0), period=12))
fore <- predict(fit, n.ahead=48)
U <- fore$pred + 2 * fore$se
L <- fore$pred - 2 * fore$se
ts.plot(hpi, fore$pred, U, L, col=c("black","blue","green","red"),lty=c(1,5,2,2),
gpars=list(xaxt="n",xlab=""), ylab="Index", main="KSE-100 Index Forecast")
fore
Conclusion
Aug 17: 46,533 (from kse100.csv)
Aug 19: 53,543
(53,543 – 46,533)/ 46,533 x 100 = 15% growth in 2 years
On average annual market growth is 7.5%.

If we invest 10M Today, we will get 11.5 M (10 x 15%) after 2 years.
Question: 4
drivers_data <- read.csv("D:/Hadi Data/Desktop/IBA - MBA/7th Term/BA/Final
Assignment/drivers_data.csv")

library(ggplot2)

ggplot(drivers_data, aes(drivers_data$Distance_Feature,drivers_data$Speeding_Feature
))+geom_point()

head(drivers_data)

drivers_cluster <- kmeans(drivers_data[,2:3],4,nstart = 20)

drivers_cluster

table(drivers_cluster$cluster,drivers_data$Speeding_Feature)

drivers_cluster$cluster <- as.factor(drivers_cluster$cluster)

ggplot(drivers_data, aes(drivers_data$Distance_Feature,drivers_data$Speeding_Feature, color =


drivers_cluster$cluster ))+geom_point()

S-ar putea să vă placă și