## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, fig.path = "figure/", fig.width = 8, fig.height = 4, out.width = "100%" ) ## ----setup, message=FALSE, warning=FALSE, eval=FALSE-------------------------- # library(tuber) # library(dplyr) # library(ggplot2) ## ----get-comments, eval=FALSE------------------------------------------------- # yt_oauth("your_app_id", "your_app_secret") # # comments <- get_all_comments(video_id = "your_video_id", max_results = 500) ## ----basic-analysis, eval=FALSE----------------------------------------------- # comments <- comments |> # mutate( # has_emoji = has_emoji(textDisplay), # emoji_count = count_emojis(textDisplay) # ) # # summary(comments$emoji_count) # # emoji_rate <- mean(comments$has_emoji, na.rm = TRUE) * 100 # cat("Comments with emojis:", round(emoji_rate, 1), "%\n") ## ----distribution, eval=FALSE------------------------------------------------- # comments |> # filter(emoji_count > 0) |> # ggplot(aes(x = emoji_count)) + # geom_histogram(binwidth = 1, fill = "steelblue", color = "white") + # labs( # title = "Distribution of Emojis per Comment", # x = "Number of Emojis", # y = "Number of Comments" # ) + # theme_minimal() ## ----frequency, eval=FALSE---------------------------------------------------- # all_emojis <- unlist(extract_emojis(comments$textDisplay)) # # emoji_freq <- as.data.frame(table(all_emojis), stringsAsFactors = FALSE) # names(emoji_freq) <- c("emoji", "count") # emoji_freq <- emoji_freq[order(-emoji_freq$count), ] # # head(emoji_freq, 15) # # emoji_freq |> # head(10) |> # ggplot(aes(x = reorder(emoji, count), y = count)) + # geom_col(fill = "steelblue") + # coord_flip() + # labs( # title = "Top 10 Most Used Emojis", # x = "Emoji", # y = "Count" # ) + # theme_minimal() ## ----temporal, eval=FALSE----------------------------------------------------- # comments <- comments |> # mutate( # date = as.Date(publishedAt), # emoji_count = count_emojis(textDisplay) # ) # # daily_emoji <- comments |> # group_by(date) |> # summarise( # total_comments = n(), # comments_with_emoji = sum(has_emoji, na.rm = TRUE), # total_emojis = sum(emoji_count, na.rm = TRUE), # emoji_rate = comments_with_emoji / total_comments * 100, # avg_emojis = total_emojis / total_comments # ) # # ggplot(daily_emoji, aes(x = date, y = emoji_rate)) + # geom_line(color = "steelblue") + # geom_smooth(method = "loess", se = TRUE, alpha = 0.2) + # labs( # title = "Emoji Usage Rate Over Time", # x = "Date", # y = "% of Comments with Emojis" # ) + # theme_minimal() ## ----sentiment, eval=FALSE---------------------------------------------------- # positive_emojis <- c( # "\U0001F600", "\U0001F601", "\U0001F602", "\U0001F603", "\U0001F604", # "\U0001F605", "\U0001F606", "\U0001F60A", "\U0001F60D", "\U0001F618", # "\U0001F44D", "\U0001F44F", "\U00002764", "\U0001F389", "\U0001F38A" # ) # # negative_emojis <- c( # "\U0001F620", "\U0001F621", "\U0001F622", "\U0001F623", "\U0001F624", # "\U0001F625", "\U0001F62D", "\U0001F44E", "\U0001F4A9", "\U0001F61E" # ) # # comments <- comments |> # mutate( # emojis = extract_emojis(textDisplay), # pos_emoji = sapply(emojis, function(e) sum(e %in% positive_emojis)), # neg_emoji = sapply(emojis, function(e) sum(e %in% negative_emojis)), # emoji_sentiment = case_when( # pos_emoji > neg_emoji ~ "positive", # neg_emoji > pos_emoji ~ "negative", # pos_emoji == 0 & neg_emoji == 0 ~ "none", # TRUE ~ "neutral" # ) # ) # # table(comments$emoji_sentiment) ## ----engagement, eval=FALSE--------------------------------------------------- # engagement_summary <- comments |> # group_by(has_emoji) |> # summarise( # n = n(), # mean_likes = mean(likeCount, na.rm = TRUE), # median_likes = median(likeCount, na.rm = TRUE) # ) # # print(engagement_summary) # # ggplot(comments, aes(x = has_emoji, y = likeCount + 1)) + # geom_boxplot(fill = "steelblue", alpha = 0.7) + # scale_y_log10() + # labs( # title = "Like Counts: Emoji vs Non-Emoji Comments", # x = "Contains Emoji", # y = "Likes (log scale)" # ) + # theme_minimal() ## ----comparison, eval=FALSE--------------------------------------------------- # video_ids <- c("video_id_1", "video_id_2", "video_id_3") # # all_comments <- lapply(video_ids, function(vid) { # comments <- get_all_comments(video_id = vid, max_results = 200) # comments$video_id <- vid # comments # }) # all_comments <- bind_rows(all_comments) # # video_emoji_stats <- all_comments |> # mutate(emoji_count = count_emojis(textDisplay)) |> # group_by(video_id) |> # summarise( # total_comments = n(), # emoji_rate = mean(emoji_count > 0) * 100, # avg_emojis = mean(emoji_count) # ) # # print(video_emoji_stats) ## ----clean-text, eval=FALSE--------------------------------------------------- # comments <- comments |> # mutate( # clean_text = remove_emojis(textDisplay), # clean_text = trimws(gsub("\\s+", " ", clean_text)) # ) # # head(comments$clean_text[comments$has_emoji], 3) ## ----performance, eval=FALSE-------------------------------------------------- # comments_sample <- comments[sample(nrow(comments), min(1000, nrow(comments))), ] # # comments_sample <- comments_sample |> # mutate(emoji_count = count_emojis(textDisplay)) # # emoji_rate_estimate <- mean(comments_sample$emoji_count > 0) * 100