## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.alt = "Visualization" ) ## ----data--------------------------------------------------------------------- library(cograph) library(Nestimate) data("human_long") head(human_long) ## ----------------------------------------------------------------------------- net <- build_network(human_long, method = "tna", action = "cluster", actor = "session_id", time = "timestamp") ## ----cluster-basic------------------------------------------------------------ clust <- build_clusters(net, k = 3) clust ## ----cluster-components------------------------------------------------------- # Cluster assignments (first 20 sessions) head(clust$assignments, 20) # Cluster sizes clust$sizes # Silhouette score (clustering quality: higher is better) clust$silhouette ## ----cluster-plot, fig.alt = "Silhouette plot showing cluster quality"-------- plot(clust, type = "silhouette") ## ----cluster-mds, fig.alt = "MDS plot showing cluster separation"------------- plot(clust, type = "mds") ## ----cluster-metrics---------------------------------------------------------- # Levenshtein distance (allows insertions/deletions) clust_lv <- build_clusters(net, k = 3, dissimilarity = "lv") clust_lv$silhouette # Longest common subsequence clust_lcs <- build_clusters(net, k = 3, dissimilarity = "lcs") clust_lcs$silhouette ## ----cluster-weighted--------------------------------------------------------- # Emphasize earlier positions (higher lambda = faster decay) clust_weighted <- build_clusters(net, k = 3, dissimilarity = "hamming", weighted = TRUE, lambda = 0.5) clust_weighted$silhouette ## ----cluster-methods---------------------------------------------------------- # Ward's method (minimizes within-cluster variance) clust_ward <- build_clusters(net, k = 3, method = "ward.D2") clust_ward$silhouette # Complete linkage clust_complete <- build_clusters(net, k = 3, method = "complete") clust_complete$silhouette ## ----choose-k----------------------------------------------------------------- ch <- cluster_choice(net, k = 2:4, method = c("pam", "ward.D2", "complete", "average")) ch ## ----choose-k-plot, fig.alt = "Silhouette across k for each clustering method"---- plot(ch, type = "lines") ## ----choose-d----------------------------------------------------------------- ch_d <- cluster_choice(net, k = 2, dissimilarity = c("hamming", "lv", "lcs"), method = "ward.D2") ch_d ## ----choose-d-plot, fig.alt = "Silhouette per dissimilarity at k = 2"--------- plot(ch_d, type = "bars", abbrev = TRUE) ## ----choose-tradeoff, fig.alt = "Quality vs cluster-size balance"------------- plot(ch_d, type = "tradeoff", abbrev = TRUE) ## ----------------------------------------------------------------------------- clust <- build_clusters(net, k = 2, method = "ward.D2", seed = 42) summary(clust) ## ----cluster-diagnostics------------------------------------------------------ diag <- cluster_diagnostics(clust) diag ## ----cluster-diagnostics-plot, fig.alt = "Per-observation silhouette by cluster"---- plot(diag, type = "silhouette") ## ----------------------------------------------------------------------------- mmm_fit <- build_mmm(net, k = 2) summary(mmm_fit) ## ----mmm-diagnostics---------------------------------------------------------- diag_mmm <- cluster_diagnostics(mmm_fit) diag_mmm ## ----mmm-diagnostics-plot, fig.alt = "Posterior certainty per MMM cluster"---- plot(diag_mmm, type = "posterior") ## ----cluster-networks--------------------------------------------------------- clust <- build_clusters(net, k = 2, method = "ward.D2") cluster_net <- build_network(clust) cluster_net ## ----cluster-networks-plot, fig.alt = "Per-cluster transition networks"------- plot(cluster_net) ## ----cluster-network-shortcut------------------------------------------------- ## `cograph::cluster_network()` also exists with a different signature ## (matrix aggregation); qualify with `Nestimate::` to avoid masking. grp_dist <- Nestimate::cluster_network(net, k = 2, cluster_by = "ward.D2") grp_dist ## ----cluster-mmm-------------------------------------------------------------- grp_mmm <- cluster_mmm(net, k = 2) grp_mmm ## ----------------------------------------------------------------------------- # Access cluster assignments attr(grp_dist, "clustering")$assignments[1:10] attr(grp_mmm, "clustering")$assignments[1:10] # Access individual cluster networks grp_dist[[1]]$weights[1:3, 1:3] ## ----------------------------------------------------------------------------- comparison <- permutation(grp_dist, iter = 100) ## ----workflow, eval = FALSE--------------------------------------------------- # # 1. Build the network from long-format data # net <- build_network(human_long, method = "tna", # actor = "session_id", # action = "cluster", # time = "timestamp") # # # 2. Sweep the parameter space # ch <- cluster_choice(net, k = 2:5, # dissimilarity = c("hamming", "lcs", "cosine"), # method = c("pam", "ward.D2")) # plot(ch, type = "facet", abbrev = TRUE) # # # 3. Pick a configuration and fit # clust <- build_clusters(net, k = 2, # dissimilarity = "hamming", # method = "ward.D2") # # # 4. Validate # diag <- cluster_diagnostics(clust) # diag # plot(diag, type = "silhouette") # # # 5. Build per-cluster networks # grp <- build_network(clust) # # # 6. Optional: model-based second opinion # mmm <- build_mmm(net, k = 2) # plot(cluster_diagnostics(mmm), type = "posterior")