knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(bigANNOY.progress = FALSE) set.seed(20260326) library(bigANNOY) library(bigmemory) compare_dir <- tempfile("bigannoy-vs-bigknn-") dir.create(compare_dir, recursive = TRUE, showWarnings = FALSE) ref_dense <- matrix(rnorm(120 * 6), nrow = 120, ncol = 6) query_dense <- matrix(rnorm(15 * 6), nrow = 15, ncol = 6) ref_big <- as.big.matrix(ref_dense) dim(ref_big) dim(query_dense) annoy_index <- annoy_build_bigmatrix( ref_big, path = file.path(compare_dir, "ref.ann"), metric = "euclidean", n_trees = 20L, seed = 123L, load_mode = "eager" ) approx_result <- annoy_search_bigmatrix( annoy_index, query = query_dense, k = 5L, search_k = 100L ) names(approx_result) approx_result$exact approx_result$backend approx_result$index[1:3, ] round(approx_result$distance[1:3, ], 3) if (length(find.package("bigKNN", quiet = TRUE)) > 0L) { knn_bigmatrix <- get("knn_bigmatrix", envir = asNamespace("bigKNN")) exact_result <- knn_bigmatrix( ref_big, query = query_dense, k = 5L, metric = "euclidean", block_size = 64L, exclude_self = FALSE ) list( names = names(exact_result), exact = exact_result$exact, backend = exact_result$backend, index_head = exact_result$index[1:3, ], distance_head = round(exact_result$distance[1:3, ], 3) ) } else { "bigKNN is not installed in this session, so the exact comparison example is skipped." } if (length(find.package("bigKNN", quiet = TRUE)) > 0L) { knn_bigmatrix <- get("knn_bigmatrix", envir = asNamespace("bigKNN")) exact_result <- knn_bigmatrix( ref_big, query = query_dense, k = 5L, metric = "euclidean", block_size = 64L, exclude_self = FALSE ) recall_at_5 <- mean(vapply(seq_len(nrow(query_dense)), function(i) { length(intersect(approx_result$index[i, ], exact_result$index[i, ])) / 5 }, numeric(1L))) recall_at_5 } else { "Recall example skipped because bigKNN is not installed." } bench <- benchmark_annoy_bigmatrix( n_ref = 200L, n_query = 20L, n_dim = 6L, k = 5L, n_trees = 20L, search_k = 100L, metric = "euclidean", exact = length(find.package("bigKNN", quiet = TRUE)) > 0L, path_dir = compare_dir, load_mode = "eager" ) bench$summary[, c( "metric", "n_trees", "search_k", "build_elapsed", "search_elapsed", "exact_elapsed", "recall_at_k" )]