knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(bigANNOY.progress = FALSE) set.seed(20260326) library(bigANNOY) library(bigmemory) tune_dir <- tempfile("bigannoy-tuning-") dir.create(tune_dir, recursive = TRUE, showWarnings = FALSE) ref_labels <- c( "unit_x", "double_x", "unit_y", "tilted_x", "unit_z", "diag_xy" ) ref_dense <- matrix( c( 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.8, 0.2, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0 ), ncol = 3, byrow = TRUE ) query_dense <- matrix( c( 1.0, 0.0, 0.0, 0.9, 0.1, 0.0 ), ncol = 3, byrow = TRUE ) ref_big <- as.big.matrix(ref_dense) data.frame( index = seq_along(ref_labels), label = ref_labels, ref_dense, row.names = NULL ) metric_table <- do.call( rbind, lapply(c("euclidean", "angular", "manhattan", "dot"), function(metric) { index_path <- file.path(tune_dir, sprintf("%s.ann", metric)) idx <- annoy_build_bigmatrix( ref_big, path = index_path, metric = metric, n_trees = 20L, seed = 123L, load_mode = "eager" ) res <- annoy_search_bigmatrix( idx, query = query_dense, k = 2L, search_k = 100L ) data.frame( metric = metric, q1_top1 = ref_labels[res$index[1, 1]], q1_distance = round(res$distance[1, 1], 3), q2_top1 = ref_labels[res$index[2, 1]], q2_distance = round(res$distance[2, 1], 3), stringsAsFactors = FALSE ) }) ) metric_table lazy_index <- annoy_build_bigmatrix( ref_big, path = file.path(tune_dir, "lazy.ann"), metric = "euclidean", n_trees = 8L, seed = 123L, load_mode = "lazy" ) eager_index <- annoy_build_bigmatrix( ref_big, path = file.path(tune_dir, "eager.ann"), metric = "euclidean", n_trees = 25L, seed = 123L, load_mode = "eager" ) c( lazy_loaded = annoy_is_loaded(lazy_index), eager_loaded = annoy_is_loaded(eager_index) ) reopened <- annoy_open_index( eager_index$path, prefault = TRUE, load_mode = "eager" ) result <- annoy_search_bigmatrix( reopened, query = query_dense, k = 2L, search_k = 100L, prefault = TRUE ) if (length(find.package("bigKNN", quiet = TRUE)) > 0L) { tuning_suite <- benchmark_annoy_recall_suite( n_ref = 200L, n_query = 20L, n_dim = 6L, k = 3L, n_trees = c(5L, 20L), search_k = c(-1L, 50L, 200L), metric = "euclidean", exact = TRUE, path_dir = tune_dir ) tuning_suite$summary[, c( "n_trees", "search_k", "build_elapsed", "search_elapsed", "recall_at_k" )] } else { tuning_suite <- benchmark_annoy_recall_suite( n_ref = 200L, n_query = 20L, n_dim = 6L, k = 3L, n_trees = c(5L, 20L), search_k = c(-1L, 50L, 200L), metric = "euclidean", exact = FALSE, path_dir = tune_dir ) tuning_suite$summary[, c( "n_trees", "search_k", "build_elapsed", "search_elapsed" )] } list( block_size_default = getOption("bigANNOY.block_size", 1024L), progress_default = getOption("bigANNOY.progress", FALSE), backend_default = getOption("bigANNOY.backend", "cpp") )