knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(bigANNOY.progress = FALSE) set.seed(20260326) library(bigANNOY) bench_dir <- tempfile("bigannoy-benchmark-") dir.create(bench_dir, recursive = TRUE, showWarnings = FALSE) bench_dir single_csv <- file.path(bench_dir, "single.csv") single <- benchmark_annoy_bigmatrix( n_ref = 200L, n_query = 20L, n_dim = 6L, k = 3L, n_trees = 10L, search_k = 50L, exact = FALSE, path_dir = bench_dir, output_path = single_csv, load_mode = "eager" ) single$summary names(single) single$params single$exact_available single$validation$valid single$validation$checks[, c("check", "passed", "severity")] read.csv(single_csv, stringsAsFactors = FALSE) external_run <- benchmark_annoy_bigmatrix( n_ref = 120L, n_query = 12L, n_dim = 5L, k = 3L, n_trees = 8L, exact = FALSE, path_dir = bench_dir ) self_run <- benchmark_annoy_bigmatrix( n_ref = 120L, query = NULL, n_dim = 5L, k = 3L, n_trees = 8L, exact = FALSE, path_dir = bench_dir ) shape_cols <- c("self_search", "n_ref", "n_query", "k") rbind( external = external_run[["summary"]][, shape_cols], self = self_run[["summary"]][, shape_cols] ) suite_csv <- file.path(bench_dir, "suite.csv") suite <- benchmark_annoy_recall_suite( n_ref = 200L, n_query = 20L, n_dim = 6L, k = 3L, n_trees = c(5L, 10L), search_k = c(-1L, 50L), exact = FALSE, path_dir = bench_dir, output_path = suite_csv, load_mode = "eager" ) suite$summary read.csv(suite_csv, stringsAsFactors = FALSE) if (length(find.package("bigKNN", quiet = TRUE)) > 0L) { exact_run <- benchmark_annoy_bigmatrix( n_ref = 150L, n_query = 15L, n_dim = 5L, k = 3L, n_trees = 10L, search_k = 50L, metric = "euclidean", exact = TRUE, path_dir = bench_dir ) exact_run$exact_available exact_run$summary[, c("build_elapsed", "search_elapsed", "exact_elapsed", "recall_at_k")] } else { "Exact baseline example skipped because bigKNN is not installed." } ref <- matrix(rnorm(80 * 4), nrow = 80, ncol = 4) query <- matrix(rnorm(12 * 4), nrow = 12, ncol = 4) user_run <- benchmark_annoy_bigmatrix( x = ref, query = query, k = 3L, n_trees = 12L, search_k = 40L, exact = FALSE, filebacked = TRUE, path_dir = bench_dir, load_mode = "eager" ) user_run$summary[, c( "filebacked", "self_search", "n_ref", "n_query", "n_dim", "build_elapsed", "search_elapsed" )] compare_csv <- file.path(bench_dir, "compare.csv") compare_run <- benchmark_annoy_vs_rcppannoy( n_ref = 200L, n_query = 20L, n_dim = 6L, k = 3L, n_trees = 10L, search_k = 50L, exact = FALSE, path_dir = bench_dir, output_path = compare_csv, load_mode = "eager" ) compare_run$summary[, c( "implementation", "reference_storage", "n_ref", "n_query", "n_dim", "total_data_bytes", "index_bytes", "build_elapsed", "search_elapsed" )] read.csv(compare_csv, stringsAsFactors = FALSE)[, c( "implementation", "ref_bytes", "query_bytes", "index_bytes", "metadata_bytes", "artifact_bytes" )] volume_csv <- file.path(bench_dir, "volume.csv") volume_run <- benchmark_annoy_volume_suite( n_ref = c(200L, 500L), n_query = 20L, n_dim = c(6L, 12L), k = 3L, n_trees = 10L, search_k = 50L, exact = FALSE, path_dir = bench_dir, output_path = volume_csv, load_mode = "eager" ) volume_run$summary[, c( "implementation", "n_ref", "n_dim", "total_data_bytes", "index_bytes", "build_elapsed", "search_elapsed" )] system.file("benchmarks", "benchmark_annoy.R", package = "bigANNOY")