laurakwiley · December 4, 2015 21:12
diff --git a/jt_extract_numbers_from_string.R b/jt_extract_numbers_from_string.R
 library(magrittr)
 library(tidyr)
 library(dplyr)

 data <- data.frame(Var1 = c("MHSMOCA:MOCATOTAL = 24", "MHSMOCA:MOCATOTAL = 24.5","MHSMOCA:MOCA7TOTAL = 24")) %>% tbl_df()

 # Option 1: Use a variable splitting function from tidyr
 ## tidyr::separate takes the column you want to split on, what you want the new column names to be, what the separating text is, and whether you want to change data types after separation (so here rendering the number as a dbl or int)
 data %>% 
  separate(col = Var1, into = c("Text","Number"), sep = "=", convert = TRUE) 

 # Option 2: Extract Number using regex
 ## Here I am using a regex to extract an equals sign, possible whitespace, and then I have a capturing group for [0-9](at least once) possibly followed by a period and more numbers. I used the "[,2]" to get the capturing group from the regex
 ## Note you can pipe within the function to perform the column type change, otherwise it keeps the number as a string
 ## 
 library(stringr)
 data %>% 
  mutate(numeric_val = str_match(Var1, "= ?([0-9]+\\.?[0-9]?)")[,2] %>% as.numeric())

 # In case your data really is a matrix
 data <- matrix(c("MHSMOCA:MOCATOTAL = 24", "MHSMOCA:MOCATOTAL = 24.5","MHSMOCA:MOCA7TOTAL = 24", "MsdHSMOCA:MOCA7TOTAL = 26.54654"), nrow = 2, ncol = 2)
 # Note this will make all of your strings factors, 
 data %<>% 
  as.data.frame() %>% 
  tbl_df()
 # Can use mutate_each to apply as.character() to every column
 data %>% 
  mutate_each(funs(as.character(.)))
	library(magrittr)
	library(tidyr)
	library(dplyr)

	data <- data.frame(Var1 = c("MHSMOCA:MOCATOTAL = 24", "MHSMOCA:MOCATOTAL = 24.5","MHSMOCA:MOCA7TOTAL = 24")) %>% tbl_df()

	# Option 1: Use a variable splitting function from tidyr
	## tidyr::separate takes the column you want to split on, what you want the new column names to be, what the separating text is, and whether you want to change data types after separation (so here rendering the number as a dbl or int)
	data %>%
	separate(col = Var1, into = c("Text","Number"), sep = "=", convert = TRUE)

	# Option 2: Extract Number using regex
	## Here I am using a regex to extract an equals sign, possible whitespace, and then I have a capturing group for [0-9](at least once) possibly followed by a period and more numbers. I used the "[,2]" to get the capturing group from the regex
	## Note you can pipe within the function to perform the column type change, otherwise it keeps the number as a string
	##
	library(stringr)
	data %>%
	mutate(numeric_val = str_match(Var1, "= ?([0-9]+\\.?[0-9]?)")[,2] %>% as.numeric())

	# In case your data really is a matrix
	data <- matrix(c("MHSMOCA:MOCATOTAL = 24", "MHSMOCA:MOCATOTAL = 24.5","MHSMOCA:MOCA7TOTAL = 24", "MsdHSMOCA:MOCA7TOTAL = 26.54654"), nrow = 2, ncol = 2)
	# Note this will make all of your strings factors,
	data %<>%
	as.data.frame() %>%
	tbl_df()
	# Can use mutate_each to apply as.character() to every column
	data %>%
	mutate_each(funs(as.character(.)))