cluster_labels <- c("A", "B", "C", "D", "E", "F", "G", "H")

# Some simple data and initial means to play with
data_pair <- t(matrix(c(-3,3,-4,3,-4,4,3,-3,4,-3,4,-4),2,6))
pair_init <- data_pair[c(1,2),]
bad_init <- t(matrix(c(-0.5, -2.5, -.5, -4.25, .25, 4.25, 0.25, 2.5), 2, 4))


data_quad <- t(matrix(c(-3,3,-4,3,-4,4,-3,-3,-4,-3,-4,-4,3,3,4,3,4,4,3,-3,4,-3,4,-4),2,12))
quad_init <- data_pair[c(1,2,4,5),]

# Given a dataset, randomly select some of the data points as means
generate_means <- function(data, K) {
	N <- dim(data)[1]
	return(data[sample(1:N, K),])
}

distance <- function(a, b) {
	# Pythagorean theorem
	val <- sqrt((a[1] - b[1])^2 + (a[2] - b[2])^2)
	return(val)
}

closest <- function(observation, means) {
	# Compute the distance from the observation to each mean
	distances <- apply(means, 1, function(row) distance(row,observation))
	closest <- which.min(distances)

	# Return the index of the closes mean (note: ties are
	# broken in favor of lower index)
	return(which.min(distances))
}

kmeans_2d <- function(data, iters, means) {
	# Size of the data
	N <- dim(data)[1]
	K <- dim(means)[1]

  	# For each point, find the closest mean
  	assignments <- apply(data, 1, function(row) closest(row, means))	
  	
  	# Iterate until we have converged
  	for (ii in 1:iters) {  		
  		if (iters > 0) {
 	    for (jj in 1:K) {
 	       # Get the data points assigned to this mean
 	       obs <- subset(cbind(data, assignments), assignments == jj)
 	       
 	       # If there are no observations, set the mean to zero
 	       if (dim(obs)[1] == 0) {
 	         means[jj,] <- c(0.0, 0.0)	
 	       } else {
	 	     # Set the new mean to the observations
 		     means[jj,] <- c(mean(obs[,1]), mean(obs[,2]))
 		   }
 	    }
 	    # For each point, find the closest mean
  		assignments <- apply(data, 1, function(row) closest(row, means))	
 	    }
  	}
  	
  	# return a data frame with the result
  	result <- data.frame(id = 1:N)
  	result$x <- data[,1]
  	result$y <- data[,2]
  	result$assign <- factor(assignments)
  	
  	centers <- data.frame(cluster = 1:K)
  	centers$x <- means[,1]
  	centers$y <- means[,2]
  	
  	return(c(data = result, means = centers))
}

add_path <- function(data_fit, mean_pos, p) {
	x_path <- c()
	y_path <- c()
	
	with_mean <- merge(data_fit, mean_pos, by="assign")
	
	for (ii in 1:nrow(data_fit)) {
		xstart <- with_mean[ii,]$x
		ystart <- with_mean[ii,]$y
		xend <- with_mean[ii,]$mx
	    yend <- with_mean[ii,]$my
		
		x_path <- c(x_path, xstart, xend)
		y_path <- c(y_path, ystart, yend)	

		x_path <- c(x_path, NA)
		y_path <- c(y_path, NA)
		
	}
	path_frame <- data.frame(x=x_path, y=y_path)
		
	p <- p + geom_path(data=path_frame, aes(x=x, y=y))
	return(p)
}

plot_results <- function(res) {
	data_fit <- data.frame(x = res$data.x, y = res$data.y, assign = res$data.assign)
	mean_pos <- data.frame(mx = res$means.x, my = res$means.y, assign = res$means.cluster)
	mean_pos$mjittery <- mean_pos$my + (-1) ^ (1 + mean_pos$assign) / 2	
	mean_pos$mlabel <- sprintf("%s: (%.2f, %.2f)", cluster_labels[mean_pos$assign], mean_pos$mx, mean_pos$my)
	
	p <- ggplot() + geom_point(data=data_fit, aes(x=x,y=y,colour=assign, size=3))
	p <- p + geom_point(data=mean_pos, x=mean_pos$mx, y=mean_pos$my, size=6, shape=5)
	
	p <- p + geom_text(data=mean_pos, aes(x=mx, y=mjittery, label=mlabel))
	p <- p + theme(legend.position = "none") 

	
	p <- add_path(data_fit, mean_pos, p)
	
	p <- p + scale_y_continuous(breaks=-5:5, lim=c(min(data_fit$x) - 1, max(data_fit$x) + 1))
	p <- p + scale_x_continuous(breaks=-5:5, lim=c(min(data_fit$y) - 1, max(data_fit$y) + 1))
	
	return(p)
}

pair_0_plot <- qplot(data_pair[,1], data_pair[,2]) + scale_y_continuous("y", breaks=-5:5, lim=c(min(data_pair[2,]) - 1, max(data_pair[2,]) + 1)) + scale_x_continuous("x", breaks=-5:5, lim=c(min(data_pair[1,]) - 1, max(data_pair[1,]) + 1))

pair_1_res <- kmeans_2d(data_pair, 0, pair_init)
pair_1_plot <- plot_results(pair_1_res)
pair_2_res <- kmeans_2d(data_pair, 1, pair_init)
pair_2_plot <- plot_results(pair_2_res)
pair_3_res <- kmeans_2d(data_pair, 2, pair_init)
pair_3_plot <- plot_results(pair_3_res)

quad_0_plot <- qplot(data_quad[,1], data_quad[,2]) + scale_y_continuous("y", breaks=-5:5, lim=c(min(data_quad[2,]) - 1, max(data_quad[2,]) + 1)) + scale_x_continuous("x", breaks=-5:5, lim=c(min(data_quad[1,]) - 1, max(data_quad[1,]) + 1))
quad_1_res <- kmeans_2d(data_quad, 0, quad_init)
quad_1_plot <- plot_results(quad_1_res)
quad_2_res <- kmeans_2d(data_quad, 1, quad_init)
quad_2_plot <- plot_results(quad_2_res)
quad_3_res <- kmeans_2d(data_quad, 2, quad_init)
quad_3_plot <- plot_results(quad_3_res)
quad_4_res <- kmeans_2d(data_quad, 4, quad_init)
quad_4_plot <- plot_results(quad_4_res)

bad_0_plot <- qplot(data_quad[,1], data_quad[,2]) + scale_y_continuous("y", breaks=-5:5, lim=c(min(data_quad[2,]) - 1, max(data_quad[2,]) + 1)) + scale_x_continuous("x", breaks=-5:5, lim=c(min(data_quad[1,]) - 1, max(data_quad[1,]) + 1))
bad_1_res <- kmeans_2d(data_quad, 0, bad_init)
bad_1_plot <- plot_results(bad_1_res)
bad_2_res <- kmeans_2d(data_quad, 1, bad_init)
bad_2_plot <- plot_results(bad_2_res)