set.seed(13579)
# dimensionality of the data
N <- 20 # number of data points
# the true weights (w1, w2)
w <- c(2, -4)
b <- 0.5
## generate X
# x is a matrix with 2 rows and N columns; each column is one data point
x <- matrix(nrow = 2, ncol = N)
# x_1 to x_2 are Normal(0, 1) to avoid the need of scaling data
x[1, ] <- rnorm(N, mean=0, sd=1)
x[2, ] <- rnorm(N, mean=0, sd=1)
## generate Y
y <- as.vector(ifelse(t(w) %*% x + b > 0, 1, -1))
pla <- function(x, y, verbose=0) {
x <- rbind(x, 1)
# initialize w.hat
w.hat <- c(0, 0, 0)
t <- 0
# start loop
while(TRUE) {
y.hat <- ifelse(t(w.hat) %*% x > 0, 1, -1)
errors <- (y.hat != y)
if (verbose)
cat("Iter:", t, " w1 =", w.hat[1], " w2 =", w.hat[2], " w0 =", w.hat[3], " errors =", sum(errors))
t <- t + 1
if(sum(errors) == 0) {
# no more classification error, algorithm stops
break
} else {
# choose an error point randomly to update w
point <- sample(which(errors == TRUE), 1)
## alternatively, choose the first error point to update w
# point <- which(errors == TRUE)[1]
if (verbose)
cat(" point selected:", point, "\n")
# update w
w.hat <- w.hat + y[point] * x[, point]
}
}
return(w.hat)
}
w.pla <- pla(x,y)
data <- data.frame(x1=x[1, ], x2=x[2, ], y=y)
svm1 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=1e+10, scale=FALSE)
# coefficients
w.svm <- colSums(as.vector(svm1$coefs) * svm1$SV)
b.svm <- - svm1$rho
# sanity check: should be 1, otherwise it is label switching
sgn <- sign((w.svm %*% svm1$SV[3,] + b.svm) * y[svm1$index[3]])
# margin
margin <- 1 / sqrt(t(w.svm) %*% w.svm)
# different SVM models
svm2 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=10, scale=FALSE)
svm3 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=1, scale=FALSE)
svm4 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=0.1, scale=FALSE)
svm5 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=0.03, scale=FALSE)
# getting coefficients
# 2
w.svm2 <- colSums(as.vector(svm2$coefs) * svm2$SV)
b.svm2 <- - svm2$rho
margin2 <- 1 / sqrt(t(w.svm2) %*% w.svm2)
# sanity check: should be 1, otherwise it is label switching
sgn2 <- sign((w.svm2 %*% svm2$SV[3,] + b.svm2) * y[svm2$index[3]])
# 3
w.svm3 <- colSums(as.vector(svm3$coefs) * svm3$SV)
b.svm3 <- - svm3$rho
margin3 <- 1 / sqrt(t(w.svm3) %*% w.svm3)
# sanity check: should be 1, otherwise it is label switching
sgn3 <- sign((w.svm3 %*% svm3$SV[3,] + b.svm3) * y[svm3$index[3]])
# 4
w.svm4 <- colSums(as.vector(svm4$coefs) * svm4$SV)
b.svm4 <- - svm4$rho
margin4 <- 1 / sqrt(t(w.svm4) %*% w.svm4)
# sanity check: should be 1, otherwise it is label switching
sgn4 <- sign((w.svm4 %*% svm4$SV[3,] + b.svm4) * y[svm4$index[3]])
# 5
w.svm5 <- colSums(as.vector(svm5$coefs) * svm5$SV)
b.svm5 <- - svm5$rho
margin5 <- 1 / sqrt(t(w.svm5) %*% w.svm5)
# sanity check: should be 1, otherwise it is label switching
sgn5 <- sign((w.svm5 %*% svm5$SV[3,] + b.svm5) * y[svm5$index[3]])
# generate test data
x.out <- matrix(nrow = 2, ncol = 10000)
# x_1 to x_2 are Normal(0, 1) to avoid the need of scaling data
x.out[1, ] <- rnorm(10000, mean=0, sd=1)
x.out[2, ] <- rnorm(10000, mean=0, sd=1)
## generate y.out
y.out <- as.vector(ifelse(t(w) %*% x.out + b > 0, 1, -1))
data.out <- data.frame(x1=x.out[1, ], x2=x.out[2, ], y=y.out)
ggplot(data = data.out, aes(x1,x2)) + geom_point(aes(color = factor(y), shape = factor(y))) +
geom_ribbon(aes(ymin=-w.svm3["x1"] / w.svm3["x2"]*x1-b.svm3 / w.svm3["x2"]-as.numeric(margin3),ymax=-w.svm3["x1"] / w.svm3["x2"]*x1-b.svm3 / w.svm3["x2"]+as.numeric(margin3)),fill="yellow", alpha = 0.3) +
geom_abline(intercept = -b.svm3 / w.svm3["x2"], slope = -w.svm3["x1"] / w.svm3["x2"], color='yellow') +
geom_abline(intercept = -w.pla[3] / w.pla[2], slope = -w.pla[1] / w.pla[2]) +
geom_abline(intercept = -b.svm5 / w.svm5["x2"], slope = -w.svm5["x1"] / w.svm5["x2"], color='blue') +
geom_abline(intercept = -b.svm4 / w.svm4["x2"], slope = -w.svm4["x1"] / w.svm4["x2"], color = 'green') +
geom_abline(intercept = -b.svm / w.svm["x2"], slope = -w.svm["x1"] / w.svm["x2"], color = 'red') +
geom_abline(intercept = -b.svm2 / w.svm2["x2"], slope = -w.svm2["x1"] / w.svm2["x2"], color = 'orange') +
geom_ribbon(aes(ymin=-w.svm["x1"] / w.svm["x2"]*x1-b.svm / w.svm["x2"]-as.numeric(margin),ymax=-w.svm["x1"] / w.svm["x2"]*x1-b.svm / w.svm["x2"]+as.numeric(margin)),fill="red", alpha = 0.5) +
geom_ribbon(aes(ymin=-w.svm2["x1"] / w.svm2["x2"]*x1-b.svm2 / w.svm2["x2"]-as.numeric(margin2),ymax=-w.svm2["x1"] / w.svm2["x2"]*x1-b.svm2 / w.svm2["x2"]+as.numeric(margin2)),fill="orange", alpha = 0.4) +
geom_ribbon(aes(ymin=-w.svm4["x1"] / w.svm4["x2"]*x1-b.svm4 / w.svm4["x2"]-as.numeric(margin4),ymax=-w.svm4["x1"] / w.svm4["x2"]*x1-b.svm4 / w.svm4["x2"]+as.numeric(margin4)),fill="green", alpha = 0.15) +
geom_ribbon(aes(ymin=-w.svm5["x1"] / w.svm5["x2"]*x1-b.svm5 / w.svm5["x2"]-as.numeric(margin5),ymax=-w.svm5["x1"] / w.svm5["x2"]*x1-b.svm5 / w.svm5["x2"]+as.numeric(margin5)),fill="blue", alpha = 0.15)
Here, the perceptron line is in black. The soft SVMs are plotted with cost = 1e+10, 10, 1, 0.1, and 0.03 represented by red, orange, yellow, green, and blue lines respectively. The ribbons represent the length of the margins. The costs are as chosen to give a visible change in the length of the margin. We see that the blue margin corresponding to the lowest cost is the largest, and the red margin corresponding to the highest cost is the smallest. We give also the margins and classification errors:
# misclassification
y.test=as.numeric(sgn)*sign(colSums(w.svm*x.out) + b.svm)
y.test2=as.numeric(sgn2)*sign(colSums(w.svm2*x.out) + b.svm2)
y.test3=as.numeric(sgn3)*sign(colSums(w.svm3*x.out) + b.svm3)
y.test4=as.numeric(sgn4)*sign(colSums(w.svm4*x.out) + b.svm4)
y.test5=as.numeric(sgn5)*sign(colSums(w.svm5*x.out) + b.svm5)
class.error = sum(y.out != y.test)/10000
class.error2 = sum(y.out != y.test2)/10000
class.error3 = sum(y.out != y.test3)/10000
class.error4 = sum(y.out != y.test4)/10000
class.error5 = sum(y.out != y.test5)/10000
c | 1e+10 | 10 | 1 | 0.1 | 0.03 |
---|---|---|---|---|---|
margin | 0.1787767 | 0.2305494 | 0.5556216 | 1.1594112 | 2.3335075 |
classification error | 0.0477 | 0.0452 | 0.104 | 0.089 | 0.3153 |
RUN = 100
pla.D = matrix(rep(0,RUN*3), nrow=RUN, ncol=3)
svm.D = matrix(rep(0,RUN*3), nrow=RUN, ncol=3)
svm2.D = matrix(rep(0,RUN*3), nrow=RUN, ncol=3)
svm3.D = matrix(rep(0,RUN*3), nrow=RUN, ncol=3)
svm4.D = matrix(rep(0,RUN*3), nrow=RUN, ncol=3)
svm5.D = matrix(rep(0,RUN*3), nrow=RUN, ncol=3)
#faster PLA
### Perceptron Learning Algorithm
pla <- function(x, y) {
x <- rbind(x, 1)
# initialize w.hat
w.hat <- c(0, 0, 0)
# start loop
while(TRUE) {
y.hat <- ifelse(t(w.hat) %*% x > 0, 1, -1)
errors <- (y.hat != y)
if(sum(errors) == 0) {
# no more classification error, algorithm stops
break
} else {
# choose an error point randomly to update w
# point <- sample(which(errors == TRUE), 1)
## alternatively, choose the first error point to update w
point <- which(errors == TRUE)[1]
# update w
w.hat <- w.hat + y[point] * x[, point]
}
}
return(w.hat)
}
for (run in 1:RUN) {
x[1, ] <- rnorm(N, mean=0, sd=1)
x[2, ] <- rnorm(N, mean=0, sd=1)
## generate Y
y <- as.vector(ifelse(t(w) %*% x + b > 0, 1, -1))
#PLA
pla.D[run, ] <- pla(x,y)
#SVM
data <- data.frame(x1=x[1, ], x2=x[2, ], y=y)
svm1 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=1e+10, scale=FALSE)
svm2 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=10, scale=FALSE)
svm3 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=1, scale=FALSE)
svm4 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=0.1, scale=FALSE)
svm5 <- svm(factor(y) ~ x1 + x2, data, type="C-classification", kernel="linear", cost=0.03, scale=FALSE)
# getting coefficients
# 1
w.svm1 <- colSums(as.vector(svm1$coefs) * svm1$SV)
b.svm1 <- - svm1$rho
margin1 <- 1 / sqrt(t(w.svm1) %*% w.svm1)
# sanity check: should be 1, otherwise it is label switching
sgn1 <- sign((w.svm1 %*% svm1$SV[1,] + b.svm1) * y[svm1$index[1]])
svm.D[run, 1:2] <- as.numeric(sgn1)*w.svm1
svm.D[run, 3] <- as.numeric(sgn1)*b.svm1
# 2
w.svm2 <- colSums(as.vector(svm2$coefs) * svm2$SV)
b.svm2 <- - svm2$rho
margin2 <- 1 / sqrt(t(w.svm2) %*% w.svm2)
# sanity check: should be 1, otherwise it is label switching
sgn2 <- sign((w.svm2 %*% svm2$SV[1,] + b.svm2) * y[svm2$index[1]])
svm2.D[run, 1:2] <- as.numeric(sgn2)*w.svm2
svm2.D[run, 3] <- as.numeric(sgn2)*b.svm2
# 3
w.svm3 <- colSums(as.vector(svm3$coefs) * svm3$SV)
b.svm3 <- - svm3$rho
margin3 <- 1 / sqrt(t(w.svm3) %*% w.svm3)
# sanity check: should be 1, otherwise it is label switching
sgn3 <- sign((w.svm3 %*% svm3$SV[1,] + b.svm3) * y[svm3$index[1]])
svm3.D[run, 1:2] <- as.numeric(sgn3)*w.svm3
svm3.D[run, 3] <- as.numeric(sgn3)*b.svm3
# 4
w.svm4 <- colSums(as.vector(svm4$coefs) * svm4$SV)
b.svm4 <- - svm4$rho
margin4 <- 1 / sqrt(t(w.svm4) %*% w.svm4)
# sanity check: should be 1, otherwise it is label switching
sgn4 <- sign((w.svm4 %*% svm4$SV[1,] + b.svm4) * y[svm4$index[1]])
svm4.D[run, 1:2] <- as.numeric(sgn4)*w.svm4
svm4.D[run, 3] <- as.numeric(sgn4)*b.svm4
# 5
w.svm5 <- colSums(as.vector(svm5$coefs) * svm5$SV)
b.svm5 <- - svm5$rho
margin5 <- 1 / sqrt(t(w.svm5) %*% w.svm5)
# sanity check: should be 1, otherwise it is label switching
sgn5 <- sign((w.svm5 %*% svm5$SV[1,] + b.svm5) * y[svm5$index[1]])
svm5.D[run, 1:2] <- as.numeric(sgn5)*w.svm5
svm5.D[run, 3] <- as.numeric(sgn5)*b.svm5
}
class.error.D <- matrix(rep(0,RUN*6), nrow=RUN, ncol=6)
for (run in 1:RUN) {
y.test.pla.D = sign(colSums(pla.D[run,1:2]*x.out) + pla.D[run,3])
y.test.D = sign(colSums(svm.D[run,1:2]*x.out) + svm.D[run,3])
y2.test.D = sign(colSums(svm2.D[run,1:2]*x.out) + svm2.D[run,3])
y3.test.D = sign(colSums(svm3.D[run,1:2]*x.out) + svm3.D[run,3])
y4.test.D = sign(colSums(svm4.D[run,1:2]*x.out) + svm4.D[run,3])
y5.test.D = sign(colSums(svm5.D[run,1:2]*x.out) + svm5.D[run,3])
class.error.D[run,1] <- sum(y.out != y.test.pla.D)/10000
class.error.D[run,2] <- sum(y.out != y.test.D)/10000
class.error.D[run,3] <- sum(y.out != y2.test.D)/10000
class.error.D[run,4] <- sum(y.out != y3.test.D)/10000
class.error.D[run,5] <- sum(y.out != y4.test.D)/10000
class.error.D[run,6] <- sum(y.out != y5.test.D)/10000
}
# average error of each algorithm
ave.class.error <- colMeans(class.error.D)
Here, we give the mean classification errors for the 5 SVMs and PLA. We see that the out of sample performance is improved when regularization is applied, but for large cost (c=1e+10). For small costs, the PLA seems to give a better out of sample performance performance, probably because the margin is too large and \(\xi_i\)’s are large (>1).
c | 1e+10 | 10 | 1 | 0.1 | 0.03 | PLA |
---|---|---|---|---|---|---|
margin | 0.1787767 | 0.2839417 | 0.4947379 | 1.5458022 | 5.1514436 | NA |
Mean classification error | 0.053547 | 0.151203 | 0.123547 | 0.215573 | 0.419972 | 0.065875 |
pla.coeff <- data.frame(a = -pla.D[, 3] / pla.D[, 2], b = -pla.D[, 1] / pla.D[, 2])
svm.coeff <- data.frame(a = -svm.D[, 3] / svm.D[, 2], b = -svm.D[, 1] / svm.D[, 2])
svm2.coeff <- data.frame(a = -svm2.D[, 3] / svm2.D[, 2], b = -svm2.D[, 1] / svm2.D[, 2])
svm3.coeff <- data.frame(a = -svm3.D[, 3] / svm3.D[, 2], b = -svm3.D[, 1] / svm3.D[, 2])
svm4.coeff <- data.frame(a = -svm4.D[, 3] / svm4.D[, 2], b = -svm4.D[, 1] / svm4.D[, 2])
svm5.coeff <- data.frame(a = -svm5.D[, 3] / svm5.D[, 2], b = -svm5.D[, 1] / svm5.D[, 2])
# pla
gp_D_x <- as.matrix(pla.coeff) %*% rbind(1,x.out[1, ])
gp_bar_x <- colMeans(gp_D_x)
gp_sd_x <- apply(gp_D_x, 2, sd)
# svm
g1_D_x <- as.matrix(svm.coeff) %*% rbind(1,x.out[1, ])
g1_bar_x <- colMeans(g1_D_x)
g1_sd_x <- apply(g1_D_x, 2, sd)
g2_D_x <- as.matrix(svm2.coeff) %*% rbind(1,x.out[1, ])
g2_bar_x <- colMeans(g2_D_x)
g2_sd_x <- apply(g2_D_x, 2, sd)
g3_D_x <- as.matrix(svm3.coeff) %*% rbind(1,x.out[1, ])
g3_bar_x <- colMeans(g3_D_x)
g3_sd_x <- apply(g3_D_x, 2, sd)
g4_D_x <- as.matrix(svm4.coeff) %*% rbind(1,x.out[1, ])
g4_bar_x <- colMeans(g4_D_x)
g4_sd_x <- apply(g4_D_x, 2, sd)
g5_D_x <- as.matrix(svm5.coeff) %*% rbind(1,x.out[1, ])
g5_bar_x <- colMeans(g5_D_x)
g5_sd_x <- apply(g5_D_x, 2, sd)
Here are 5 sequatial plots, first for PLA and SVM with c = 1e+10 (red), then adding c = 10 (orange), 1 (yellow), 0.1 (green), and 0.03 (blue) respectively. We see that for decreasing cost from 1e+10 to 1 does not change the bias(x) and variance(x) much. When c is less than 1, the varaince(x) and bias(x) starts to deviate more and this is probably the reason for the poor performance.
ggplot() + geom_line(aes(x=x.out[1, ], y=gp_bar_x), color="black", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=gp_bar_x - gp_sd_x,ymax=gp_bar_x + gp_sd_x), alpha=0.3, fill='black') +
geom_line(aes(x=x.out[1, ], y=g1_bar_x), color="red", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g1_bar_x - g1_sd_x,ymax=g1_bar_x + g1_sd_x), alpha=0.5, fill='red') +
geom_point(aes(x=x.out[1, ], y=y.out), color="darkblue", size=1) + labs(x="x", y="y") +
theme_bw()
ggplot() + geom_line(aes(x=x.out[1, ], y=gp_bar_x), color="black", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=gp_bar_x - gp_sd_x,ymax=gp_bar_x + gp_sd_x), alpha=0.3, fill='black') +
geom_line(aes(x=x.out[1, ], y=g1_bar_x), color="red", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g1_bar_x - g1_sd_x,ymax=g1_bar_x + g1_sd_x), alpha=0.5, fill='red') +
geom_line(aes(x=x.out[1, ], y=g2_bar_x), color="orange", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g2_bar_x - g2_sd_x,ymax=g2_bar_x + g2_sd_x), alpha=0.4, fill='orange') +
geom_point(aes(x=x.out[1, ], y=y.out), color="darkblue", size=1) + labs(x="x", y="y") +
theme_bw()
ggplot() + geom_line(aes(x=x.out[1, ], y=gp_bar_x), color="black", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=gp_bar_x - gp_sd_x,ymax=gp_bar_x + gp_sd_x), alpha=0.3, fill='black') +
geom_line(aes(x=x.out[1, ], y=g1_bar_x), color="red", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g1_bar_x - g1_sd_x,ymax=g1_bar_x + g1_sd_x), alpha=0.5, fill='red') +
geom_line(aes(x=x.out[1, ], y=g2_bar_x), color="orange", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g2_bar_x - g2_sd_x,ymax=g2_bar_x + g2_sd_x), alpha=0.4, fill='orange') +
geom_line(aes(x=x.out[1, ], y=g3_bar_x), color="yellow", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g3_bar_x - g3_sd_x,ymax=g3_bar_x + g3_sd_x), alpha=0.3, fill='yellow') +
geom_point(aes(x=x.out[1, ], y=y.out), color="darkblue", size=1) + labs(x="x", y="y") +
theme_bw()
ggplot() + geom_line(aes(x=x.out[1, ], y=gp_bar_x), color="black", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=gp_bar_x - gp_sd_x,ymax=gp_bar_x + gp_sd_x), alpha=0.3, fill='black') +
geom_line(aes(x=x.out[1, ], y=g1_bar_x), color="red", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g1_bar_x - g1_sd_x,ymax=g1_bar_x + g1_sd_x), alpha=0.5, fill='red') +
geom_line(aes(x=x.out[1, ], y=g2_bar_x), color="orange", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g2_bar_x - g2_sd_x,ymax=g2_bar_x + g2_sd_x), alpha=0.4, fill='orange') +
geom_line(aes(x=x.out[1, ], y=g3_bar_x), color="yellow", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g3_bar_x - g3_sd_x,ymax=g3_bar_x + g3_sd_x), alpha=0.3, fill='yellow') +
geom_line(aes(x=x.out[1, ], y=g4_bar_x), color="green", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g4_bar_x - g4_sd_x,ymax=g4_bar_x + g4_sd_x), alpha=0.2, fill='green') +
geom_point(aes(x=x.out[1, ], y=y.out), color="darkblue", size=1) + labs(x="x", y="y") +
theme_bw()
ggplot() + geom_line(aes(x=x.out[1, ], y=gp_bar_x), color="black", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=gp_bar_x - gp_sd_x,ymax=gp_bar_x + gp_sd_x), alpha=0.3, fill='black') +
geom_line(aes(x=x.out[1, ], y=g1_bar_x), color="red", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g1_bar_x - g1_sd_x,ymax=g1_bar_x + g1_sd_x), alpha=0.5, fill='red') +
geom_line(aes(x=x.out[1, ], y=g2_bar_x), color="orange", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g2_bar_x - g2_sd_x,ymax=g2_bar_x + g2_sd_x), alpha=0.4, fill='orange') +
geom_line(aes(x=x.out[1, ], y=g3_bar_x), color="yellow", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g3_bar_x - g3_sd_x,ymax=g3_bar_x + g3_sd_x), alpha=0.3, fill='yellow') +
geom_line(aes(x=x.out[1, ], y=g4_bar_x), color="green", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g4_bar_x - g4_sd_x,ymax=g4_bar_x + g4_sd_x), alpha=0.2, fill='green') +
geom_line(aes(x=x.out[1, ], y=g5_bar_x), color="blue", size=1) +
geom_ribbon(aes(x=x.out[1, ], ymin=g5_bar_x - g5_sd_x,ymax=g5_bar_x + g5_sd_x), alpha=0.1, fill='blue') +
geom_point(aes(x=x.out[1, ], y=y.out), color="darkblue", size=1) + labs(x="x", y="y") +
theme_bw()
y.out.value <- as.vector(t(w) %*% x.out + b)
bias.p <- mean((gp_bar_x - y.out.value)^2)
variance.p <- mean((t(gp_D_x) - gp_bar_x)^2)
mse.p <- mean((t(gp_D_x) - y.out.value)^2)
bias.1 <- mean((g1_bar_x - y.out.value)^2)
variance.1 <- mean((t(g1_D_x) - g1_bar_x)^2)
mse.1 <- mean((t(g1_D_x) - y.out.value)^2)
bias.2 <- mean((g2_bar_x - y.out.value)^2)
variance.2 <- mean((t(g2_D_x) - g2_bar_x)^2)
mse.2 <- mean((t(g2_D_x) - y.out.value)^2)
bias.3 <- mean((g3_bar_x - y.out.value)^2)
variance.3 <- mean((t(g3_D_x) - g3_bar_x)^2)
mse.3 <- mean((t(g3_D_x) - y.out.value)^2)
bias.4 <- mean((g4_bar_x - y.out.value)^2)
variance.4 <- mean((t(g4_D_x) - g4_bar_x)^2)
mse.4 <- mean((t(g4_D_x) - y.out.value)^2)
bias.5 <- mean((g5_bar_x - y.out.value)^2)
variance.5 <- mean((t(g5_D_x) - g5_bar_x)^2)
mse.5 <- mean((t(g5_D_x) - y.out.value)^2)
c | 1e+10 | 10 | 1 | 0.1 | 0.03 | PLA |
---|---|---|---|---|---|---|
Bias | 18.1544184 | 18.1261732 | 18.1167264 | 18.1133552 | 18.7723243 | 18.1705976 |
Variance | 0.0585361 | 0.059356 | 0.0879806 | 0.5107483 | 7.7136987 | 0.0806699 |
MSE | 18.2129544 | 18.1855292 | 18.2047069 | 18.6241035 | 26.486023 | 18.2512674 |
We see that regularization from PLA to SVM with high cost decreases the variance, as it should. Once the cost is too low, the SVM algorithm performs poorly. Hence, the choice of c is important.