Sunteți pe pagina 1din 3

R - Scripts

system('defaults write org.R-project.R force.LANG en_US.UTF-8')

install.packages("dplyr")
install.packages("factoextra", dependencies = TRUE )
# Clusering
library(cluster)
library(facto extra)
library(fpc)
library(gridExtra)

# Read data from the csv file


data <- read.csv("/Users/ggananth/Code/ai_workshop/store_data.csv")

# Skip the first 3 columns which are categorical data


sdata <- data[,4:nrow(data)]

# K-means
my_kmeans = kmeans(sdata, 2)
# Visualize
fviz_cluster(my_kmeans, data = sdata)

# partitioning around medics


my_pam = pam(sdata, 2)
fviz_cluster(my_pam, data = sdata)

# DB Scan
my_dbscan = dbscan(sdata, eps = 1000)
fviz_cluster(my_dbscan, data = sdata)

# Principal Component analysis


pr <- prcomp(sdata)
# Rotation matrix (First dimension)
pr$rotation[,1]

install.packages (‘e1071ʼ, dependencies = TRUE)


install.packages(“caret”, dependencies = TRUE)
install.packages(“ElemStatLearn”)
library(ElemStatLearn)
library(e1071)

# Split into train and test


train_data <- data[1f4000,-c(1,3)]
test_data <- data[4000f5000,-c(1,3)]

# Y are the class labels


y <- train_data[,1]
# X are the features
x <- train_data[,2:ncol(train_data)]

# Two ways of training. .. second method helps in visualisation


model_svm <- svm(x, y, scale = FALSE, type = "nu-classification", nu = 0.1)
model_svm <- svm(Tier ~ ., train_data, scale = FALSE, type = "nu-
classification", nu = 0.1)

# Predict class labels for new data


pred <- predict(model_svm, test_data[,2:ncol(test_data)])

#Confusion matrix show different metrics


confusionMatrix(pred,test_data[,1])

#You can plot the seperating hyperplanes for 2 dimensions at a time


plot(model_svm, train_data, Weekend ~ Weekday)

# Tuning of SVM
best_params <- tune.svm(x, y, type = “nu-classification”, kernel = “radial”, nu =
list(0.001, 0.01, 0.1, 0.5, 0.9), epsilon = list(0.0001, 0.001, 0.01, 0.1, 1, 10, 100))
best_params$best.parameters

# Logistical regression
lr_train_data <- train_data[,-1]
lr_train_data$Class <- factor(ifelse(train_data$Tier == "Tier 1", 1, 0), levels =
c(0, 1))
model_glm <- glm(Class ~ ., data = lr_train_data, family = "binomial")
lr_test_data$Class <- factor(ifelse(test_data$Tier == "Tier 1", 1, 0), levels = c(0,
1))
pred <- factor(ifelse(predict(model_glm, lr_test_data[,-1], type = "response") >
0.5, 1, 0), levels = c(0,1))
confusionMatrix(pred, lr_test_data$Class)
#Adaboost
install.packages("adabag")
library(adabag)
model_ada <- boosting( Tier ~ ., train_data, boos = TRUE, mfinal = 100)
pred <- predict(model_ada, test_data)
pred$confusion
1 - pred$error

# Neural network
install.packages("neuralnet")
require(neuralnet)
library(ramify)

model_nn = neuralnet(Tier ~ . , data = train_data, hidden = 2, act.fct = "logistic",


linear.output = FALSE, rep = 10)

#Predict class labels for new data


pred <- compute(model_nn,test_data)

# Final result provides probability scores for the three classes…


pred$net.result

# Find the argmax ( which means maximum among the classes)


p <- as.numeric(argmax(pred$net.result, rows = TRUE))
ytest <- as.numeric(factor(test_data[,1]))

# Find the accuracy


mean(p == ytest)

S-ar putea să vă placă și