Sunteți pe pagina 1din 4

Artificial Neural Network

YIK LUN, KEI


x<-matrix(c(3,5,5,1,10,2),ncol=2,byrow=T)
y<-matrix(c(75,82,93))
colmax<-apply(x,2,max)
X<-t(t(x)/colmax) #scaling the data
Y<-y/100
input_layer_size <- 2
output_layer_size <- 1
hidden_layer_size <- 3
set.seed(10)
W_1 <- matrix(runif(6),nrow = input_layer_size,ncol = hidden_layer_size)
sigmoid <- function(Z) 1/(1 + exp(-Z))
W_2 <- matrix(runif(3),nrow = hidden_layer_size,ncol = output_layer_size)

cost_hist <- rep(NA, 10000)


cost<-function(y,y_hat) 0.5 * sum((y-y_hat)^2)
sigmoidprime <- function(z) exp(-z) / ((1+exp(-z))^2)
scalar <- 5

for(i in 1:10000){
Z_2 <- X %*% W_1
A_2 <- sigmoid(Z_2)
Z_3 <- A_2 %*% W_2
Y_hat <- sigmoid(Z_3)
cost_hist[i] <- cost(Y,Y_hat)
delta_3 <- (-(Y-Y_hat) * sigmoidprime(Z_3))
djdw2 <- t(A_2) %*% delta_3
delta_2 <- delta_3 %*% t(W_2) * sigmoidprime(Z_2)
djdw1 <- t(X) %*% delta_2
W_1 <- W_1 - scalar * djdw1
W_2 <- W_2 - scalar * djdw2
}
W_1
##
[,1]
[,2]
[,3]
## [1,] 2.018745 -1.288067 2.1068752
## [2,] -0.510094 -1.001957 -0.7276968
W_2
##
[,1]
## [1,] 1.677602
## [2,] -3.618915
1

## [3,]

1.979281

Y_hat
##
[,1]
## [1,] 0.7500001
## [2,] 0.8202053
## [3,] 0.9296046
Y
##
[,1]
## [1,] 0.75
## [2,] 0.82
## [3,] 0.93
cost(Y,Y_hat)
## [1] 9.926357e-08

0.00 0.01 0.02 0.03 0.04 0.05

cost_hist

plot(cost_hist, type="l")

2000

4000

6000
Index

8000

10000

8
12
16

log(cost_hist)

plot(log(cost_hist),type = "l")

2000

4000

6000

8000

Index

cost_optim <- function(w){


x<-matrix(c(3,5,5,1,10,2),ncol=2,byrow=T)
y<-matrix(c(75,82,93))
colmax<-apply(x,2,max)
X<-t(t(x)/colmax) #scaling the data
Y<-y/100
W_1 <- matrix(w[1:6],nrow=2,ncol=3)
W_2 <- matrix(w[7:9],nrow=3,ncol=1)
Z_2 <- X %*% W_1
A_2 <- sigmoid(Z_2)
Z_3 <- A_2 %*% W_2
Y_hat <- sigmoid(Z_3)
0.5*sum((Y-Y_hat)^2) #minimize this
}
set.seed(1)
res <- optim(runif(9),fn=cost_optim,method="BFGS",control = list(maxit=1000))
print(res)
## $par
3

10000

##
##
##
##
##
##
##
##
##
##
##
##
##
##
##

[1]
[7]

2.34134355 -1.43848624
1.44822475 0.89619914

1.82193939 -0.14279366
0.14810563

1.71923317 -0.01690314

$value
[1] 0.001226227
$counts
function gradient
47
45
$convergence
[1] 0
$message
NULL

W_1 <- matrix(res$par[1:6],nrow=2,ncol=3)


W_2 <- matrix(res$par[7:9],nrow=3,ncol=1)
Z_2 <- X %*% W_1
A_2 <- sigmoid(Z_2)
Z_3 <- A_2 %*% W_2
Y_hat <- sigmoid(Z_3)
cost(Y,Y_hat)
## [1] 0.001226227
res<-matrix(NA,nrow=100,ncol=2)
for(i in 1:100){
set.seed(i)
res[i,1]<-i
res[i,2] <- optim(runif(9),fn=cost_optim,method="BFGS",control = list(maxit=1000))$value
}
res[which(res[,2]==min(res[,2])),]
## [1] 6.300000e+01 8.079222e-16

S-ar putea să vă placă și