Sunteți pe pagina 1din 12

cart

V. Visalakshi
22/11/2019

setwd("C:/Users/IBM/Downloads/R programming/Data Set/Datamining")


cartdata = read.csv("(CART)simpleDTDS.csv")
print(cartdata)

## X X1 X2 Y
## 1 1 0.26750821 0.26550866 Bad
## 2 2 0.21864528 0.37212390 Bad
## 3 3 0.51679684 0.57285336 Bad
## 4 4 0.26895059 0.90820779 Good
## 5 5 0.18116833 0.20168193 Bad
## 6 6 0.51857614 0.89838968 Good
## 7 7 0.56278294 0.94467527 Bad
## 8 8 0.12915685 0.66079779 Bad
## 9 9 0.25636760 0.62911404 Good
## 10 10 0.71793528 0.06178627 Bad
## 11 11 0.96140994 0.20597457 Good
## 12 12 0.10014085 0.17655675 Bad
## 13 13 0.76322269 0.68702285 Bad
## 14 14 0.94796635 0.38410372 Bad
## 15 15 0.81863469 0.76984142 Good
## 16 16 0.30829233 0.49769924 Good
## 17 17 0.64957946 0.71761851 Bad
## 18 18 0.95335545 0.99190609 Bad
## 19 19 0.95373265 0.38003518 Good
## 20 20 0.33997920 0.77744522 Good
## 21 21 0.26247411 0.93470523 Good
## 22 22 0.16545393 0.21214252 Bad
## 23 23 0.32216806 0.65167377 Good
## 24 24 0.51012521 0.12555510 Bad
## 25 25 0.92396847 0.26722067 Good
## 26 26 0.51095970 0.38611409 Bad
## 27 27 0.25762126 0.01339033 Bad
## 28 28 0.04646089 0.38238796 Bad
## 29 29 0.41785626 0.86969085 Good
## 30 30 0.85400150 0.34034900 Good
## 31 31 0.34723068 0.48208012 Good
## 32 32 0.13144232 0.59956583 Good
## 33 33 0.37448686 0.49354131 Good
## 34 34 0.63142023 0.18621760 Bad
## 35 35 0.39007893 0.82737332 Bad
## 36 36 0.68962785 0.66846674 Bad
## 37 37 0.68941341 0.79423986 Bad
## 38 38 0.55490062 0.10794363 Good
## 39 39 0.42962441 0.72371095 Good
## 40 40 0.45272006 0.41127443 Good
## 41 41 0.30644326 0.82094629 Good
## 42 42 0.57835394 0.64706019 Bad

1
## 43 43 0.91037030 0.78293276 Bad
## 44 44 0.14260408 0.55303631 Good
## 45 45 0.41504763 0.52971958 Good
## 46 46 0.21092575 0.78935623 Good
## 47 47 0.42875037 0.02333120 Good
## 48 48 0.13268998 0.47723007 Good
## 49 49 0.46009645 0.73231374 Good
## 50 50 0.94295706 0.69273156 Bad
## 51 51 0.76197386 0.47761962 Bad
## 52 52 0.93290983 0.86120948 Bad
## 53 53 0.47067850 0.43809711 Good
## 54 54 0.60358807 0.24479728 Bad
## 55 55 0.48498968 0.07067905 Good
## 56 56 0.10880632 0.09946616 Bad
## 57 57 0.24772683 0.31627171 Bad
## 58 58 0.49851453 0.51863426 Good
## 59 59 0.37286671 0.66200508 Good
## 60 60 0.93469137 0.40683019 Good
## 61 61 0.52398608 0.91287592 Bad
## 62 62 0.31714467 0.29360337 Bad
## 63 63 0.27796603 0.45906573 Good
## 64 64 0.78754051 0.33239467 Bad
## 65 65 0.70246251 0.65087047 Bad
## 66 66 0.16502764 0.25801678 Bad
## 67 67 0.06445754 0.47854525 Good
## 68 68 0.75470562 0.76631067 Good
## 69 69 0.62041003 0.08424691 Bad
## 70 70 0.16957677 0.87532133 Good
## 71 71 0.06221405 0.33907294 Bad
## 72 72 0.10902927 0.83944035 Good
## 73 73 0.38171635 0.34668349 Bad
## 74 74 0.16931091 0.33377493 Bad
## 75 75 0.29865254 0.47635125 Good
## 76 76 0.19220954 0.89219834 Good
## 77 77 0.25717002 0.86433947 Good
## 78 78 0.18123182 0.38998954 Bad
## 79 79 0.47731371 0.77732070 Bad
## 80 80 0.77073704 0.96061800 Bad
## 81 81 0.02778712 0.43465948 Good
## 82 82 0.52731078 0.71251468 Bad
## 83 83 0.88031907 0.39999437 Good
## 84 84 0.37306337 0.32535215 Bad
## 85 85 0.04795913 0.75708715 Bad
## 86 86 0.13862825 0.20269226 Bad
## 87 87 0.32149212 0.71112122 Good
## 88 88 0.15483161 0.12169192 Bad
## 89 89 0.13222817 0.24548851 Bad
## 90 90 0.22130593 0.14330438 Bad
## 91 91 0.22638080 0.23962942 Bad
## 92 92 0.13141653 0.05893438 Bad
## 93 93 0.98156346 0.64228826 Bad
## 94 94 0.32701373 0.87626921 Good
## 95 95 0.50693950 0.77891468 Bad
## 96 96 0.68144251 0.79730883 Bad

2
## 97 97 0.09916910 0.45527445 Good
## 98 98 0.11890256 0.41008408 Good
## 99 99 0.05043966 0.81087024 Good
## 100 100 0.92925392 0.60493329 Bad
## 101 101 0.67371223 0.65472393 Bad
## 102 102 0.09485786 0.35319727 Bad
## 103 103 0.49259612 0.27026015 Bad
## 104 104 0.46155184 0.99268406 Bad
## 105 105 0.37521653 0.63349326 Good
## 106 106 0.99109922 0.21320814 Good
## 107 107 0.17635071 0.12937235 Bad
## 108 108 0.81343521 0.47811803 Bad
## 109 109 0.06844664 0.92407447 Good
## 110 110 0.40044975 0.59876097 Good
## 111 111 0.14114433 0.97617069 Good
## 112 112 0.19330986 0.73179251 Good
## 113 113 0.84135172 0.35672691 Bad
## 114 114 0.71991399 0.43147369 Bad
## 115 115 0.26721208 0.14821156 Bad
## 116 116 0.49500164 0.01307758 Bad
## 117 117 0.08311390 0.71556607 Good
## 118 118 0.35388424 0.10318424 Bad
## 119 119 0.96920881 0.44628435 Bad
## 120 120 0.62471419 0.64010105 Bad
## 121 121 0.66461825 0.99183862 Bad
## 122 122 0.31248966 0.49559358 Good
## 123 123 0.40568961 0.48434952 Good
## 124 124 0.99607737 0.17344233 Good
## 125 125 0.85508236 0.75482094 Bad
## 126 126 0.95354840 0.45389549 Bad
## 127 127 0.81230509 0.51116978 Bad
## 128 128 0.78218212 0.20754511 Good
## 129 129 0.26787813 0.22865814 Bad
## 130 130 0.76215153 0.59571200 Bad
## 131 131 0.98631159 0.57487220 Bad
## 132 132 0.29360555 0.07706438 Bad
## 133 133 0.39935111 0.03554058 Bad
## 134 134 0.81213152 0.64279549 Bad
## 135 135 0.07715167 0.92861520 Good
## 136 136 0.36369681 0.59809242 Good
## 137 137 0.44259247 0.56090075 Good
## 138 138 0.15671413 0.52602772 Good
## 139 139 0.58220527 0.98509522 Bad
## 140 140 0.97016218 0.50764182 Bad
## 141 141 0.98949983 0.68278808 Bad
## 142 142 0.17645204 0.60154122 Good
## 143 143 0.54213042 0.23886868 Bad
## 144 144 0.38430389 0.25816593 Bad
## 145 145 0.67616405 0.72930962 Bad
## 146 146 0.26929378 0.45257083 Good
## 147 147 0.46925094 0.17512677 Bad
## 148 148 0.17180008 0.74669827 Good
## 149 149 0.36918946 0.10498764 Bad
## 150 150 0.72540527 0.86454495 Good

3
## 151 151 0.48614910 0.61464497 Good
## 152 152 0.06380247 0.55715954 Good
## 153 153 0.78454623 0.32877732 Good
## 154 154 0.41832164 0.45313145 Bad
## 155 155 0.98101808 0.50044097 Bad
## 156 156 0.28288396 0.18086636 Bad
## 157 157 0.84788215 0.52963060 Bad
## 158 158 0.08223923 0.07527575 Bad
## 159 159 0.88645875 0.27775593 Good
## 160 160 0.47193073 0.21269952 Bad
## 161 161 0.10910096 0.28479048 Bad
## 162 162 0.33327798 0.89509410 Good
## 163 163 0.83741657 0.44623532 Bad
## 164 164 0.27684984 0.77998489 Good
## 165 165 0.58703514 0.88061903 Bad
## 166 166 0.83673227 0.41312421 Bad
## 167 167 0.07115402 0.06380848 Bad
## 168 168 0.70277874 0.33548749 Bad
## 169 169 0.69882454 0.72372595 Bad
## 170 170 0.46396238 0.33761533 Good
## 171 171 0.43693111 0.63041412 Good
## 172 172 0.56217679 0.84061455 Bad
## 173 173 0.92848323 0.85613166 Bad
## 174 174 0.23046641 0.39135928 Bad
## 175 175 0.22181375 0.38049389 Bad
## 176 176 0.42021589 0.89544543 Good
## 177 177 0.33352081 0.64431576 Bad
## 178 178 0.86480755 0.74107865 Bad
## 179 179 0.17719454 0.60530345 Good
## 180 180 0.49331873 0.90308161 Good
## 181 181 0.42971337 0.29373016 Bad
## 182 182 0.56426384 0.19126011 Bad
## 183 183 0.65616232 0.88645094 Bad
## 184 184 0.97855406 0.50333949 Good
## 185 185 0.23216115 0.87705754 Good
## 186 186 0.24081160 0.18919362 Bad
## 187 187 0.79683608 0.75810305 Bad
## 188 188 0.83167172 0.72449889 Bad
## 189 189 0.11350771 0.94372482 Good
## 190 190 0.96331202 0.54764659 Bad
## 191 191 0.14732290 0.71174387 Good
## 192 192 0.14362694 0.38890510 Bad
## 193 193 0.92522994 0.10087313 Good
## 194 194 0.50703560 0.92730209 Bad
## 195 195 0.15485102 0.28323250 Bad
## 196 196 0.34830205 0.59057316 Good
## 197 197 0.65982103 0.11036060 Bad
## 198 198 0.31177237 0.84050703 Bad
## 199 199 0.35157341 0.31796368 Bad
## 200 200 0.14784571 0.78285134 Good

cartdata = cartdata[,-1]
nrow(cartdata)

4
## [1] 200

sum(cartdata$Y=="Good")/nrow(cartdata)

## [1] 0.415

table(cartdata$Y)

##
## Bad Good
## 117 83

plot(cartdata$X1, cartdata$X2)
points(cartdata$X1[cartdata$Y=="Good"],cartdata$X2[cartdata$Y=="Good"], col = "blue", pch = 19)
points(cartdata$X1[cartdata$Y=="Bad"],cartdata$X2[cartdata$Y=="Bad"], col = "red", pch = 19)
1.0
0.8
cartdata$X2

0.6
0.4
0.2
0.0

0.0 0.2 0.4 0.6 0.8 1.0

cartdata$X1

library(rpart)

## Warning: package 'rpart' was built under R version 3.6.1

library(rpart.plot)

## Warning: package 'rpart.plot' was built under R version 3.6.1

5
library(rattle)

## Warning: package 'rattle' was built under R version 3.6.1

## Rattle: A free graphical interface for data science with R.


## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.

tree = rpart(Y~.,data = cartdata, method = "class", minbucket = 3, cp = 0)


tree

## n= 200
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 200 83 Bad (0.58500000 0.41500000)
## 2) X2< 0.3956768 73 14 Bad (0.80821918 0.19178082)
## 4) X1< 0.7500587 60 4 Bad (0.93333333 0.06666667)
## 8) X1< 0.4140507 41 0 Bad (1.00000000 0.00000000) *
## 9) X1>=0.4140507 19 4 Bad (0.78947368 0.21052632)
## 18) X1>=0.4666067 16 2 Bad (0.87500000 0.12500000) *
## 19) X1< 0.4666067 3 1 Good (0.33333333 0.66666667) *
## 5) X1>=0.7500587 13 3 Good (0.23076923 0.76923077)
## 10) X2>=0.330586 5 2 Bad (0.60000000 0.40000000) *
## 11) X2< 0.330586 8 0 Good (0.00000000 1.00000000) *
## 3) X2>=0.3956768 127 58 Good (0.45669291 0.54330709)
## 6) X1>=0.502727 57 7 Bad (0.87719298 0.12280702)
## 12) X2>=0.422299 54 5 Bad (0.90740741 0.09259259)
## 24) X2< 0.7622069 34 1 Bad (0.97058824 0.02941176) *
## 25) X2>=0.7622069 20 4 Bad (0.80000000 0.20000000)
## 50) X2>=0.7809237 17 2 Bad (0.88235294 0.11764706) *
## 51) X2< 0.7809237 3 1 Good (0.33333333 0.66666667) *
## 13) X2< 0.422299 3 1 Good (0.33333333 0.66666667) *
## 7) X1< 0.502727 70 8 Good (0.11428571 0.88571429)
## 14) X2>=0.6389045 38 7 Good (0.18421053 0.81578947)
## 28) X1>=0.4608241 3 1 Bad (0.66666667 0.33333333) *
## 29) X1< 0.4608241 35 5 Good (0.14285714 0.85714286)
## 58) X2< 0.6614014 3 1 Bad (0.66666667 0.33333333) *
## 59) X2>=0.6614014 32 3 Good (0.09375000 0.90625000)
## 118) X2< 0.8524233 18 3 Good (0.16666667 0.83333333)
## 236) X2>=0.8241598 3 1 Bad (0.66666667 0.33333333) *
## 237) X2< 0.8241598 15 1 Good (0.06666667 0.93333333) *
## 119) X2>=0.8524233 14 0 Good (0.00000000 1.00000000) *
## 15) X2< 0.6389045 32 1 Good (0.03125000 0.96875000) *

rpart.plot(tree)

6
Bad
0.42
100%
yes X2 < 0.4 no

Bad Good
0.19 0.54
36% 64%
X1 < 0.75 X1 >= 0.5

Bad Good Bad Good


0.07 0.77 0.12 0.89
30% 6% 28% 35%
X1 < 0.41 X2 >= 0.33 X2 >= 0.42 X2 >= 0.64

Bad Bad Good


0.21 0.09 0.82
10% 27% 19%
X1 >= 0.47 X2 < 0.76 X1 >= 0.46

Bad Good
0.20 0.86
10% 18%
X2 >= 0.78 X2 < 0.66

Good
0.91
16%
X2 < 0.85

Good
0.83
9%
X2 >= 0.82

Bad Bad Good Bad Good Bad Bad Good Good Bad Bad Bad Good Good Good
0.00 0.12 0.67 0.40 1.00 0.03 0.12 0.67 0.67 0.33 0.33 0.33 0.93 1.00 0.97
20% 8% 2% 2% 4% 17% 8% 2% 2% 2% 2% 2% 8% 7% 16%

printcp(tree)

##
## Classification tree:
## rpart(formula = Y ~ ., data = cartdata, method = "class", minbucket = 3,
## cp = 0)
##
## Variables actually used in tree construction:
## [1] X1 X2
##
## Root node error: 83/200 = 0.415
##
## n= 200
##
## CP nsplit rel error xerror xstd
## 1 0.3253012 0 1.00000 1.00000 0.083954
## 2 0.0843373 2 0.34940 0.37349 0.061664
## 3 0.0120482 3 0.26506 0.30120 0.056350
## 4 0.0080321 5 0.24096 0.32530 0.058225
## 5 0.0060241 8 0.21687 0.34940 0.059993
## 6 0.0000000 14 0.18072 0.42169 0.064742

plotcp(tree)

7
size of tree

1.2
1.0 1 3 4 6 9 15
X−val Relative Error

0.8
0.6
0.4
0.2

Inf 0.17 0.032 0.0098 0.007 0

cp

fancyRpartPlot(tree)

8
1
Bad
.58 .42
100%
yes X2 < 0.4 no
2 3
Bad Good
.81 .19 .46 .54
36% 64%
X1 < 0.75 X1 >= 0.5
4 5 6 7
Bad Good Bad Good
.93 .07 .23 .77 .88 .12 .11 .89
30% 6% 28% 35%
X1 < 0.41 X2 >= 0.33 X2 >= 0.42 X2 >= 0.64
9 12 14
Bad Bad Good
.79 .21 .91 .09 .18 .82
10% 27% 19%
X1 >= 0.47 X2 < 0.76 X1 >= 0.46
25 29
Bad Good
.80 .20 .14 .86
10% 18%
X2 >= 0.78 X2 < 0.66
59
Good
.09 .91
16%
X2 < 0.85
118
Good
.17 .83
9%
X2 >= 0.82

8 18 19 10 11 24 50 51 13 28 58 236 237 119 15


Bad Bad Good Bad Good Bad Bad Good Good Bad Bad Bad Good Good Good
1.00 .00 .88 .12 .33 .67 .60 .40 .00 1.00 .97 .03 .88 .12 .33 .67 .33 .67 .67 .33 .67 .33 .67 .33 .07 .93 .00 1.00 .03 .97
20% 8% 2% 2% 4% 17% 8% 2% 2% 2% 2% 2% 8% 7% 16%

Rattle 2019−Nov−22 12:57:29 IBM

ptree = prune(tree, cp = tree$cptable[which.min(tree$cptable[,"xerror"])], "CP")


ptree

## n= 200
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 200 83 Bad (0.58500000 0.41500000)
## 2) X2< 0.3956768 73 14 Bad (0.80821918 0.19178082)
## 4) X1< 0.7500587 60 4 Bad (0.93333333 0.06666667) *
## 5) X1>=0.7500587 13 3 Good (0.23076923 0.76923077) *
## 3) X2>=0.3956768 127 58 Good (0.45669291 0.54330709)
## 6) X1>=0.502727 57 7 Bad (0.87719298 0.12280702) *
## 7) X1< 0.502727 70 8 Good (0.11428571 0.88571429) *

printcp(ptree)

##
## Classification tree:
## rpart(formula = Y ~ ., data = cartdata, method = "class", minbucket = 3,
## cp = 0)
##
## Variables actually used in tree construction:

9
## [1] X1 X2
##
## Root node error: 83/200 = 0.415
##
## n= 200
##
## CP nsplit rel error xerror xstd
## 1 0.325301 0 1.00000 1.00000 0.083954
## 2 0.084337 2 0.34940 0.37349 0.061664
## 3 0.012048 3 0.26506 0.30120 0.056350

rpart.plot(ptree)

Bad
0.42
100%
yes X2 < 0.4 no

Bad Good
0.19 0.54
36% 64%
X1 < 0.75 X1 >= 0.5

Bad Good Bad Good


0.07 0.77 0.12 0.89
30% 6% 28% 35%

fancyRpartPlot(ptree)

10
1
Bad
.58 .42
100%
yes X2 < 0.4 no

2 3
Bad Good
.81 .19 .46 .54
36% 64%
X1 < 0.75 X1 >= 0.5

4 5 6 7
Bad Good Bad Good
.93 .07 .23 .77 .88 .12 .11 .89
30% 6% 28% 35%
Rattle 2019−Nov−22 12:57:30 IBM

plot(cartdata$X1, cartdata$X2)
points(cartdata$X1[cartdata$Y=="Good"],cartdata$X2[cartdata$Y=="Good"], col = "blue", pch = 19)
points(cartdata$X1[cartdata$Y=="Bad"],cartdata$X2[cartdata$Y=="Bad"], col = "red", pch = 19)
lines(c(0,1),c(.4,.4))

11
1.0
0.8
cartdata$X2

0.6
0.4
0.2
0.0

0.0 0.2 0.4 0.6 0.8 1.0

cartdata$X1

cartdata$Prediction = predict(ptree, data = cartdata, type = "class")


cartdata$Score = predict(ptree, data = cartdata, type = "prob")

12

S-ar putea să vă placă și