Sunteți pe pagina 1din 4

cluster

V. Visalakshi
21/11/2019

setwd("C:/Users/IBM/Downloads/R programming/Data Set/Datamining")


CustSpenData = read.csv("Cust_Spend_Data.csv")
print(CustSpenData)

## Cust_ID Name Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items


## 1 1 A 10000 2 1 1
## 2 2 B 7000 3 0 10
## 3 3 C 7000 7 1 3
## 4 4 D 6500 5 1 1
## 5 5 E 6000 6 0 12
## 6 6 F 4000 3 0 1
## 7 7 G 2500 5 0 11
## 8 8 H 2500 3 0 1
## 9 9 I 2000 2 0 2
## 10 10 J 1000 4 0 1
## Staples_Items
## 1 0
## 2 9
## 3 4
## 4 4
## 5 3
## 6 8
## 7 2
## 8 1
## 9 2
## 10 7

DistMatrix = dist(CustSpenData[,3:7], method = "euclidean")


print(DistMatrix)

## 1 2 3 4 5 6
## 2 3000.027333
## 3 3000.007500 9.539392
## 4 3500.003571 500.110988 500.008000
## 5 4000.018375 1000.024500 1000.041999 500.123985
## 6 6000.005500 3000.013667 3000.006167 2500.004200 2000.038750
## 7 7500.007600 4500.006000 4500.008111 4000.013125 3500.000429 1500.046666
## 8 7500.000200 4500.016111 4500.003333 4000.001750 3500.019143 1500.016333
## 9 8000.000375 5000.011400 5000.003100 4500.001667 4000.014625 2000.009500
## 10 9000.003000 6000.007167 6000.001917 5500.001000 5000.014100 3000.000333
## 7 8 9
## 2
## 3
## 4
## 5
## 6

1
## 7
## 8 10.246951
## 9 500.089992 500.003000
## 10 1500.041999 1500.012333 1000.015000

CustSpenData.Scaled = scale(CustSpenData[,3:7])
print(CustSpenData.Scaled)

## Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items Staples_Items


## [1,] 1.7896887 -1.1766968 1.449138 -0.7033848 -1.3093073
## [2,] 0.7471516 -0.5883484 -0.621059 1.2149373 1.6366342
## [3,] 0.7471516 1.7650452 1.449138 -0.2770910 0.0000000
## [4,] 0.5733954 0.5883484 1.449138 -0.7033848 0.0000000
## [5,] 0.3996392 1.1766968 -0.621059 1.6412311 -0.3273268
## [6,] -0.2953855 -0.5883484 -0.621059 -0.7033848 1.3093073
## [7,] -0.8166541 0.5883484 -0.621059 1.4280842 -0.6546537
## [8,] -0.8166541 -0.5883484 -0.621059 -0.7033848 -0.9819805
## [9,] -0.9904102 -1.1766968 -0.621059 -0.4902379 -0.6546537
## [10,] -1.3379226 0.0000000 -0.621059 -0.7033848 0.9819805
## attr(,"scaled:center")
## Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items
## 4850.0 4.0 0.3 4.3
## Staples_Items
## 4.0
## attr(,"scaled:scale")
## Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items
## 2877.5954468 1.6996732 0.4830459 4.6916001
## Staples_Items
## 3.0550505

apply(CustSpenData.Scaled,2,FUN = "mean")

## Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items


## -1.112391e-17 -1.114126e-17 1.113476e-17 5.572799e-18
## Staples_Items
## 1.110223e-17

apply(CustSpenData.Scaled,2, FUN = "sd")

## Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items


## 1 1 1 1
## Staples_Items
## 1

DistMatrix.Scaled = dist(CustSpenData.Scaled, method = "euclidean")


print(DistMatrix.Scaled)

## 1 2 3 4 5 6 7
## 2 4.2517388
## 3 3.4112669 3.8378243
## 4 2.5117802 3.4726146 1.2635399

2
## 5 4.2682459 2.6972243 2.9222826 3.2041264
## 6 3.9795157 2.2077106 3.5786969 2.8529626 3.4313611
## 7 4.3778822 3.0208501 3.3840821 3.3450984 1.4064486 3.1712205
## 8 3.3958848 3.6031362 3.6627416 2.9269187 3.2435396 2.3498342 2.4566071
## 9 3.5339309 3.3945393 4.0537248 3.2125675 3.4815229 2.1752673 2.6125726
## 10 4.5499760 2.9668534 3.5909115 3.0412646 3.4081147 1.2410400 2.7999297
## 8 9
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 0.7272685
## 10 2.1154237 2.0565464

clust = hclust(DistMatrix.Scaled, method = "average")


plot(clust, labels = as.character(CustSpenData$Name))

Cluster Dendrogram
3.5
2.5

B
Height

1.5

G
C

J
0.5

DistMatrix.Scaled
hclust (*, "average")

plot(clust, labels = as.character(CustSpenData$Name))


rect.hclust(clust, k=3, border = "red")

3
Cluster Dendrogram
3.5
2.5

B
Height

1.5

G
C

J
0.5

DistMatrix.Scaled
hclust (*, "average")

CustSpenData$Cluster = cutree(clust, k=3)


CustomerProfile = aggregate(CustSpenData[,-c(1,2,8)], list(CustSpenData$Cluster), FUN = "mean")
print(CustomerProfile)

## Group.1 Avg_Mthly_Spend No_Of_Visits Apparel_Items FnV_Items


## 1 1 7833.333 4.666667 1 1.666667
## 2 2 5166.667 4.666667 0 11.000000
## 3 3 2375.000 3.000000 0 1.250000
## Staples_Items
## 1 2.666667
## 2 4.666667
## 3 4.500000

S-ar putea să vă placă și