Skip to content

Instantly share code, notes, and snippets.

@cengel
Last active August 14, 2020 19:14
Show Gist options
  • Save cengel/0c4b2027180f7a06c2afa3139602901a to your computer and use it in GitHub Desktop.
Save cengel/0c4b2027180f7a06c2afa3139602901a to your computer and use it in GitHub Desktop.
NCCU R Intro Carpentries Workshop Notes
########################################
### Intro to R NCCU, Aug 12-14, 2020 ###
########################################
# this is the workshop script
# get working directory:
getwd() # ctrl + enter to execute this line
# just a test shift + ctrl + c > make a comment (more shortcuts under Tools > shortcuts)
# another line
# and anogtehr
# download data
# create a new data directory
dir.create("data")
# create two more directories: data_output, fig_output
dir.create("data_output")
dir.create("fig_output")
# download the dataset from figshare into your local data folder
download.file("https://ndownloader.figshare.com/files/11492171", "data/SAFI_clean.csv", mode = "wb")
# install additional packages
install.packages("tidyverse", dependencies = TRUE)
######
# creating objects
3 + 8
12 / 7
area_hectares <- 1.0 # assigns a value to object
area_hectares = 1.0
# alt / opt - <-
area_hectares # retrieve value of object
# use object in calculation
2.478 * area_hectares
area_hectares <- 2.5
area_hectares
area_acres <- 2.47 * area_hectares
area_hectares <- 50
# from eterhpad
length <- 19
width <- 12
area = length * width
area
length <- 50 # change value
area # value not changed
area = length * width # multply again!
area # value changed
# functions in R
getwd() # no input argument
a <- 64
sqrt(a)
b <- sqrt(a)
b
round(3.141593)
# return 2 digits
round(3.141593, digits = 2)
?round
round(3.141593, 2) # keep order of arguments as defined
round(digits = 2, x = 3.141593) # name arguments to switch order!
################################################
# Vectors
hh_members <- c(3, 7, 10, 6)
hh_members # what is in the vector?
# a vector of characters
respondent_wall_type <- c("muddaub", "burntbricks", "sunbricks")
respondent_wall_type
length(hh_members)
length(respondent_wall_type)
# what type is the element?
class(hh_members)
class(respondent_wall_type)
# structure of object?
str(hh_members)
str(respondent_wall_type)
# add values to a vector
possessions <- c("bicycle", "radio", "television")
possessions <- c(possessions, "mobile_phone")
possessions # it now has 4 elements!
# object names in R
2x <- 4 # cannot begin with numbers
for # don't use reseved words
length <- 4 # avoid using function names
possessions <- c("car", possessions)
possessions
# other data types
# logical
my_logical <- c(TRUE, FALSE, FALSE)
class(my_logical)
# integer
my_integer <- c(2L, 4L, 4L)
class(my_integer)
# from exercise 2
# what class are those?
num_char <- c(1, 2, 3, "a")
class(num_char) # a character vector
num_char
num_logical <- c(1, 2, 3, TRUE)
class(num_logical) # a numeric vector
num_logical
char_logical <- c("a", "b", "c", TRUE)
class(char_logical)
char_logical
tricky <- c(1, 2, 3, "4")
class(tricky)
tricky
# 1. a vector can only be of one element type
# 2. character will override numeric will override logical
num_logical <- c(1, 2, 3, TRUE)
num_logical
char_logical <- c("a", "b", "c", TRUE)
char_logical
combined_logical <- c(num_logical, char_logical)
combined_logical
# subsetting
respondent_wall_type
# by index:
respondent_wall_type[2] # extract 2nd element
respondent_wall_type[-2] # everything except 2nd element
respondent_wall_type[c(2, 3)] # extract 2nd and 3rd element
respondent_wall_type[-c(2, 3)] # extract all except 2nd and 3rd element
respondent_wall_type[c(1, 2, 3, 1, 3 ,3, 3)]
respondent_wall_type
respondent_wall_type <- respondent_wall_type[c(1, 3, 3, 2)]
respondent_wall_type
# conditional subsetting
hh_members
hh_members[c(TRUE, FALSE, TRUE, TRUE)]
# example condition
hh_members > 5
hh_members[hh_members > 5]
hh_members[hh_members < 3 | hh_members > 5] # | means OR
hh_members >= 7 # greater or equal
hh_members == 7 # equal
hh_members != 7 # not equal
hh_members > 3 & hh_members < 10 # & means AND
possessions
possessions[possessions == "car"]
possessions[possessions == "car" | possessions == "bicycle"]
possessions %in% c("car", "bicycle")
possessions[possessions %in% c("car", "bicycle")]
# from exercise 3a/b
# extract two lowest values
hh_income <- c(177, 134, 172, 157, 100)
hh_income[hh_income<135]
length(hh_income)
hh_income[hh_income <= 100 | hh_income <= 150 ]
hh_income[hh_income <= 150]
hh_income [hh_income<=135]
# extract "television"
possessions <- c("car", "bicycle", "radio", "television", "mobile_phone")
possessions [possessions=="television"]
possessions[4]
possessions[possessions == "television"]
# missing data
rooms <- c(2, 1, 1, NA, 4)
max(rooms)
max(rooms, na.rm = TRUE)
mean(rooms)
mean(rooms, na.rm = TRUE)
# extract which elements are missing
is.na(rooms)
# extract elemets that are NOT missing
!is.na(rooms)
rooms[!is.na(rooms)]
# how many elements are NA
is.na(rooms)
sum(is.na(rooms)) # one element is NA!
complete.cases(rooms) # returns a locigal vector
rooms[complete.cases(rooms)]
na.omit(rooms) # returns object with NA removed
# from exercise 4
rooms <- c(1, 2, 1, 1, NA, 3, 1, 3, 2, 1, 1, 8, 3, 1, NA, 1)
# median
median(rooms[complete.cases(rooms)])
median(rooms,na.rm = TRUE)
median(rooms[!is.na(rooms)])
median(rooms,na.rm = TRUE)
median(rooms[!is.na(rooms)])
rooms <- rooms[na.omit(rooms)]
median(rooms)
# remove NAs
roomscomp <- (rooms[complete.cases(rooms)])
cleaned_rooms <- rooms[!is.na(rooms)]
rooms <- c(rooms[!is.na(rooms)])
rooms <- rooms[na.omit(rooms)]
clean_rooms <- rooms[!is.na(rooms)]
clean_rooms
# households with more than 2 rooms
rooms[rooms >2]
length(rooms[rooms >2])
cleaned_rooms <- rooms[!is.na(rooms)]
cleaned_rooms
length(cleaned_rooms[cleaned_rooms > 2])
####### GGPLOT #########
library(tidyverse)
# Download link for interviews_plotting.csv
#https://drive.google.com/file/d/1AVDcIcgWeQs4pEQVCaxMzQtdziHCMRYt/view?usp=sharing
# Unzip and place in your data_output folder.
interviews_plotting <- read_csv("data_output/interviews_plotting.csv")
# scatter plots
ggplot(interviews_plotting, # data
aes(x = no_membrs, y = number_items)) + # mapping - what to plot?
geom_point() # geometry, visual properties, how to display it?
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) +
geom_point(alpha = 0.5)
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) +
geom_jitter(alpha = 0.5)
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) +
geom_jitter(alpha = 0.5, color = "blue")
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) +
geom_jitter(aes(color = village), alpha = 0.5)
interviews_plot <- ggplot(interviews_plotting, aes( x = no_membrs, y = number_items))
interviews_plot +
geom_jitter()
# barplots
ggplot(interviews_plotting, aes(x = respondent_wall_type)) +
geom_bar() # number of cases
ggplot(interviews_plotting, aes(x = respondent_wall_type)) +
geom_bar(aes(fill = village)) # number of cases colored by village
ggplot(interviews_plotting, aes(x = respondent_wall_type)) +
geom_bar(aes(fill = village), position = "dodge") # number of cases colored by village
interviews_plotting %>%
filter(respondent_wall_type != "cement") %>%
count(village, respondent_wall_type) %>%
group_by(village) %>%
mutate(percent = n / sum(n) * 100) %>%
ggplot(aes(x = village, y = percent, fill = respondent_wall_type)) +
geom_col(position = "dodge") + # values in the data (instead of counts)
labs(title = " my great plot", x = "Village", y = "Percent") +
theme_bw() +
theme(text = element_text(size = 16))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment