## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, eval = FALSE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- # library(DOPE) ## ----------------------------------------------------------------------------- # library(readxl) # controlled <- read_excel("../inst/extdata/c_cs_alpha.xlsx") ## ----packages----------------------------------------------------------------- # library(conflicted) # suppressMessages(conflict_prefer("filter", "dplyr")) # suppressPackageStartupMessages(library(dplyr)) # library(stringr) # str_count & str_detect # library(tidyr) # separate # library(readr) # write_csv ## ----------------------------------------------------------------------------- # new <- controlled %>% # mutate(difficult = str_count(Names, "[(]") > 0 | # str_detect(controlled$Names, ",(?=\\S)")) ## ----------------------------------------------------------------------------- # # filtered rows where synonyms are "difficult" # difficult <- new %>% # filter(difficult == TRUE) # # # created an text file (CSV) for all rows with "difficult" synonyms # # difficult %>% # # select(- difficult) %>% # # write_csv("../inst/extdata/Difficult.csv") # # # for the 'difficult' synonyms, I plan to split by semicolon # # prepped csv file for splitting synonyms via semicolon # synonyms_edited <- read_csv("../inst/extdata/Difficult_Edited.csv") # # # data set of difficult synonyms, all split by semicolon # synonyms_difficult <- # synonyms_edited %>% # separate( # Names, # into = c("n_1", "n_2", "n_3", "n_4", "n_5", "n_6", "n_7", "n_8", "n_9"), # extra = "drop", # fill = "right", # sep = "[;]", # remove = FALSE # ) %>% # select(everything()) %>% # mutate(across(starts_with("n_"), ~str_trim(.x))) %>% # pivot_longer( # cols = starts_with("n_"), # values_to = "synonym", # values_drop_na = TRUE) %>% # select(-c(name, Names)) %>% # filter(synonym != '') ## ----------------------------------------------------------------------------- # # filtered rows where synonyms are NOT "difficult" # easy <- new %>% # filter(difficult %in% c(FALSE, NA)) # # # made the comma replacements and created a dataset for each type of # # transformation, with the final result being a comma # # # change semicolon to comma # semi_is_gone <- # easy %>% # slice(6, 64, 80, 378) %>% # mutate(Names = str_replace_all(Names, ";", ",")) # # # replace "and" with comma # and_is_gone <- # easy %>% # slice(79, 120, 247, 274, 422, 423) %>% # mutate(Names = str_replace_all(Names, " and", ",")) # # # remove the phrase involving ecstasy # ecstasy_is_gone <- # easy %>% # slice(58) %>% # mutate(Names = str_remove_all(Names, " has been sold as Ecstasy, i.e.")) # # # remove comma after synonym # extra_comma_is_gone <- # easy %>% # slice(376) %>% # mutate(Names = str_remove_all(Names, ",")) # # # replace "or" with comma # or_is_gone <- # easy %>% # slice(328) %>% # mutate(Names = str_replace_all(Names, " or", ",")) # # # dataset of rows that did NOT require a comma change # # (i.e. I left them the way they are) # easy_nochanges <- # easy %>% # slice(-6, -58, -64, -79, -80, -120, -247, -274, -328, -376, -378, -422, -423) # # # bind rows that required a comma change and rows that didn't # # now the data is ready to be split by comma # synonyms_easy_prep <- # bind_rows( # semi_is_gone, # and_is_gone, # ecstasy_is_gone, # extra_comma_is_gone, # or_is_gone, # easy_nochanges # ) # # # dataset of easy synonyms, all split by comma # synonyms_easy <- # synonyms_easy_prep %>% # # move the comma separated names into their own columns. # # mine new columns are enough to hold the drugs with MANY synonyms. # separate( # Names, # into = c("n_1", "n_2", "n_3", "n_4", "n_5", "n_6", "n_7", "n_8", "n_9"), # extra = "drop", # fill = "right", # sep = "[,]", # remove = FALSE # ) %>% # # remove extra spaces for all the newly created variables # mutate(across(starts_with("n_"), ~str_trim(.x))) %>% # # make the dataset long # pivot_longer( # cols = starts_with("n_"), # values_to = "synonym", # values_drop_na = TRUE) %>% # select(-c(name, Names, difficult)) %>% # # get of any blank name columns # filter(synonym != '') ## ----------------------------------------------------------------------------- # dea_controlled <- bind_rows(synonyms_difficult, synonyms_easy) %>% # mutate(synonym = if_else(synonym == "Soneryl (UK)", "Soneryl", synonym)) %>% # rename("substance" = SUBSTANCE) %>% # rename("number" = Number) %>% # rename("schedule" = Schedule) %>% # rename("narcotic" = Narcotic) # # usethis::use_data(dea_controlled, overwrite = TRUE)