## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----logo, echo=FALSE, out.width="20%", fig.align="right"--------------------- knitr::include_graphics("../man/figures/logo.png") ## ----setup-------------------------------------------------------------------- library(SONO) # Generate data set.seed(1) X <- sample(c(1:3), 500, replace = TRUE, prob = c(0.2, 0.3, 0.5)) X <- cbind(X, sample(c(1:2), 500, replace = TRUE, prob = c(0.1, 0.9))) X <- cbind(X, sample(c(1:5), 500, replace = TRUE, prob = rep(0.2, 5))) X <- data.frame(X) # Ensure every column is a factor for (i in 1:ncol(X)){ X[, i] <- factor(X[, i]) } # Run SONO with probability vectors matching data generating process prob_vecs <- list(c(0.2, 0.3, 0.5), c(0.1, 0.9), rep(0.2, 5)) # Run SONO with true probabilities and r = 2 sono_res1 <- sono(data = X, probs = prob_vecs, alpha = 0.01, r = 2, MAXLEN = 0, frequent = FALSE, verbose = TRUE) # See summary of scores summary(sono_res1[[2]][, 2]) ## ----msspecification---------------------------------------------------------- # Run SONO with misspecified probability vectors prob_vecs_mis <- list(c(0.4, 0.4, 0.2), c(0.9, 0.1), rep(0.2, 5)) # Run SONO with true probabilities and r = 2 sono_res2 <- sono(data = X, probs = prob_vecs_mis, alpha = 0.01, r = 2, MAXLEN = 0, frequent = FALSE, verbose = TRUE) # See summary of scores summary(sono_res2[[2]][, 2]) ## ----summaries---------------------------------------------------------------- # See summary of scores for each case summary(sono_res2[[2]][which(X[, 1] == 1), 2]) summary(sono_res2[[2]][which(X[, 1] == 2), 2]) summary(sono_res2[[2]][which(X[, 2] == 1), 2]) ## ----contributions_plot------------------------------------------------------- # Plot matrix of contributions vis_contribs(contribs_mat = sono_res2[[3]], subset = which(sono_res2[[2]][, 2] > 0), scale = "max") ## ----eval_funs---------------------------------------------------------------- outliers <- which(X[, 2] == 1) # Compute average rank of outliers avg_rank <- avg_rank_outs(scores = sono_res2[[2]][, 2], outs = outliers, ties = "min") cat('Average rank of outliers:', avg_rank, '\n') grid_vals <- c(1, 2.5, seq(5, 100, by = 5))/100 recall <- recall_at_k(scores = sono_res2[[2]][, 2], outs = outliers, grid = grid_vals) for (i in 1:length(grid_vals)){ cat('Recall at', grid_vals[i], ':', recall[i], '\n') } roc_auc_vals <- roc_auc(scores = sono_res2[[2]][, 2], outs = outliers, grid = grid_vals) for (i in 1:length(grid_vals)){ cat('ROC AUC at', grid_vals[i], ':', roc_auc_vals[i], '\n') }