knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options(bigANNOY.progress = FALSE) set.seed(20260326) library(bigANNOY) library(bigmemory) workspace_dir <- tempfile("bigannoy-filebacked-") dir.create(workspace_dir, recursive = TRUE, showWarnings = FALSE) make_filebacked_matrix <- function(values, type, backingpath, name) { bm <- filebacked.big.matrix( nrow = nrow(values), ncol = ncol(values), type = type, backingfile = sprintf("%s.bin", name), descriptorfile = sprintf("%s.desc", name), backingpath = backingpath ) bm[,] <- values bm } ref_dense <- matrix( c( 0.0, 0.0, 5.0, 0.0, 0.0, 5.0, 5.0, 5.0, 9.0, 9.0 ), ncol = 2, byrow = TRUE ) ref_fb <- make_filebacked_matrix( values = ref_dense, type = "double", backingpath = workspace_dir, name = "ref" ) ref_desc <- describe(ref_fb) ref_desc_path <- file.path(workspace_dir, "ref.desc") file.exists(ref_desc_path) dim(ref_fb) index_path <- file.path(workspace_dir, "ref.ann") index <- annoy_build_bigmatrix( x = ref_desc_path, path = index_path, n_trees = 25L, metric = "euclidean", seed = 99L, load_mode = "lazy" ) index query_dense <- matrix( c( 0.2, 0.1, 4.7, 5.1 ), ncol = 2, byrow = TRUE ) query_fb <- make_filebacked_matrix( values = query_dense, type = "double", backingpath = workspace_dir, name = "query" ) query_result_big <- annoy_search_bigmatrix( index, query = query_fb, k = 2L, search_k = 100L ) query_result_big$index round(query_result_big$distance, 3) query_desc <- describe(query_fb) query_desc_path <- file.path(workspace_dir, "query.desc") query_result_desc <- annoy_search_bigmatrix( index, query = query_desc, k = 2L, search_k = 100L ) query_result_path <- annoy_search_bigmatrix( index, query = query_desc_path, k = 2L, search_k = 100L ) query_result_desc$index query_result_path$index identical(query_result_big$index, query_result_desc$index) identical(query_result_big$index, query_result_path$index) all.equal(query_result_big$distance, query_result_desc$distance) index_store <- filebacked.big.matrix( nrow = nrow(query_dense), ncol = 2L, type = "integer", backingfile = "nn_index.bin", descriptorfile = "nn_index.desc", backingpath = workspace_dir ) distance_store <- filebacked.big.matrix( nrow = nrow(query_dense), ncol = 2L, type = "double", backingfile = "nn_distance.bin", descriptorfile = "nn_distance.desc", backingpath = workspace_dir ) streamed_result <- annoy_search_bigmatrix( index, query = query_desc, k = 2L, xpIndex = describe(index_store), xpDistance = file.path(workspace_dir, "nn_distance.desc") ) bigmemory::as.matrix(index_store) round(bigmemory::as.matrix(distance_store), 3) index_store_again <- attach.big.matrix(file.path(workspace_dir, "nn_index.desc")) distance_store_again <- attach.big.matrix(file.path(workspace_dir, "nn_distance.desc")) bigmemory::as.matrix(index_store_again) round(bigmemory::as.matrix(distance_store_again), 3) query_sep <- big.matrix( nrow = nrow(query_dense), ncol = ncol(query_dense), type = "double", separated = TRUE ) query_sep[,] <- query_dense sep_result <- annoy_search_bigmatrix( index, query = describe(query_sep), k = 2L, search_k = 100L ) sep_result$index round(sep_result$distance, 3) identical(sep_result$index, query_result_big$index) all.equal(sep_result$distance, query_result_big$distance)