Last active
August 14, 2020 19:14
-
-
Save cengel/0c4b2027180f7a06c2afa3139602901a to your computer and use it in GitHub Desktop.
NCCU R Intro Carpentries Workshop Notes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################## | |
### Intro to R NCCU, Aug 12-14, 2020 ### | |
######################################## | |
# this is the workshop script | |
# get working directory: | |
getwd() # ctrl + enter to execute this line | |
# just a test shift + ctrl + c > make a comment (more shortcuts under Tools > shortcuts) | |
# another line | |
# and anogtehr | |
# download data | |
# create a new data directory | |
dir.create("data") | |
# create two more directories: data_output, fig_output | |
dir.create("data_output") | |
dir.create("fig_output") | |
# download the dataset from figshare into your local data folder | |
download.file("https://ndownloader.figshare.com/files/11492171", "data/SAFI_clean.csv", mode = "wb") | |
# install additional packages | |
install.packages("tidyverse", dependencies = TRUE) | |
###### | |
# creating objects | |
3 + 8 | |
12 / 7 | |
area_hectares <- 1.0 # assigns a value to object | |
area_hectares = 1.0 | |
# alt / opt - <- | |
area_hectares # retrieve value of object | |
# use object in calculation | |
2.478 * area_hectares | |
area_hectares <- 2.5 | |
area_hectares | |
area_acres <- 2.47 * area_hectares | |
area_hectares <- 50 | |
# from eterhpad | |
length <- 19 | |
width <- 12 | |
area = length * width | |
area | |
length <- 50 # change value | |
area # value not changed | |
area = length * width # multply again! | |
area # value changed | |
# functions in R | |
getwd() # no input argument | |
a <- 64 | |
sqrt(a) | |
b <- sqrt(a) | |
b | |
round(3.141593) | |
# return 2 digits | |
round(3.141593, digits = 2) | |
?round | |
round(3.141593, 2) # keep order of arguments as defined | |
round(digits = 2, x = 3.141593) # name arguments to switch order! | |
################################################ | |
# Vectors | |
hh_members <- c(3, 7, 10, 6) | |
hh_members # what is in the vector? | |
# a vector of characters | |
respondent_wall_type <- c("muddaub", "burntbricks", "sunbricks") | |
respondent_wall_type | |
length(hh_members) | |
length(respondent_wall_type) | |
# what type is the element? | |
class(hh_members) | |
class(respondent_wall_type) | |
# structure of object? | |
str(hh_members) | |
str(respondent_wall_type) | |
# add values to a vector | |
possessions <- c("bicycle", "radio", "television") | |
possessions <- c(possessions, "mobile_phone") | |
possessions # it now has 4 elements! | |
# object names in R | |
2x <- 4 # cannot begin with numbers | |
for # don't use reseved words | |
length <- 4 # avoid using function names | |
possessions <- c("car", possessions) | |
possessions | |
# other data types | |
# logical | |
my_logical <- c(TRUE, FALSE, FALSE) | |
class(my_logical) | |
# integer | |
my_integer <- c(2L, 4L, 4L) | |
class(my_integer) | |
# from exercise 2 | |
# what class are those? | |
num_char <- c(1, 2, 3, "a") | |
class(num_char) # a character vector | |
num_char | |
num_logical <- c(1, 2, 3, TRUE) | |
class(num_logical) # a numeric vector | |
num_logical | |
char_logical <- c("a", "b", "c", TRUE) | |
class(char_logical) | |
char_logical | |
tricky <- c(1, 2, 3, "4") | |
class(tricky) | |
tricky | |
# 1. a vector can only be of one element type | |
# 2. character will override numeric will override logical | |
num_logical <- c(1, 2, 3, TRUE) | |
num_logical | |
char_logical <- c("a", "b", "c", TRUE) | |
char_logical | |
combined_logical <- c(num_logical, char_logical) | |
combined_logical | |
# subsetting | |
respondent_wall_type | |
# by index: | |
respondent_wall_type[2] # extract 2nd element | |
respondent_wall_type[-2] # everything except 2nd element | |
respondent_wall_type[c(2, 3)] # extract 2nd and 3rd element | |
respondent_wall_type[-c(2, 3)] # extract all except 2nd and 3rd element | |
respondent_wall_type[c(1, 2, 3, 1, 3 ,3, 3)] | |
respondent_wall_type | |
respondent_wall_type <- respondent_wall_type[c(1, 3, 3, 2)] | |
respondent_wall_type | |
# conditional subsetting | |
hh_members | |
hh_members[c(TRUE, FALSE, TRUE, TRUE)] | |
# example condition | |
hh_members > 5 | |
hh_members[hh_members > 5] | |
hh_members[hh_members < 3 | hh_members > 5] # | means OR | |
hh_members >= 7 # greater or equal | |
hh_members == 7 # equal | |
hh_members != 7 # not equal | |
hh_members > 3 & hh_members < 10 # & means AND | |
possessions | |
possessions[possessions == "car"] | |
possessions[possessions == "car" | possessions == "bicycle"] | |
possessions %in% c("car", "bicycle") | |
possessions[possessions %in% c("car", "bicycle")] | |
# from exercise 3a/b | |
# extract two lowest values | |
hh_income <- c(177, 134, 172, 157, 100) | |
hh_income[hh_income<135] | |
length(hh_income) | |
hh_income[hh_income <= 100 | hh_income <= 150 ] | |
hh_income[hh_income <= 150] | |
hh_income [hh_income<=135] | |
# extract "television" | |
possessions <- c("car", "bicycle", "radio", "television", "mobile_phone") | |
possessions [possessions=="television"] | |
possessions[4] | |
possessions[possessions == "television"] | |
# missing data | |
rooms <- c(2, 1, 1, NA, 4) | |
max(rooms) | |
max(rooms, na.rm = TRUE) | |
mean(rooms) | |
mean(rooms, na.rm = TRUE) | |
# extract which elements are missing | |
is.na(rooms) | |
# extract elemets that are NOT missing | |
!is.na(rooms) | |
rooms[!is.na(rooms)] | |
# how many elements are NA | |
is.na(rooms) | |
sum(is.na(rooms)) # one element is NA! | |
complete.cases(rooms) # returns a locigal vector | |
rooms[complete.cases(rooms)] | |
na.omit(rooms) # returns object with NA removed | |
# from exercise 4 | |
rooms <- c(1, 2, 1, 1, NA, 3, 1, 3, 2, 1, 1, 8, 3, 1, NA, 1) | |
# median | |
median(rooms[complete.cases(rooms)]) | |
median(rooms,na.rm = TRUE) | |
median(rooms[!is.na(rooms)]) | |
median(rooms,na.rm = TRUE) | |
median(rooms[!is.na(rooms)]) | |
rooms <- rooms[na.omit(rooms)] | |
median(rooms) | |
# remove NAs | |
roomscomp <- (rooms[complete.cases(rooms)]) | |
cleaned_rooms <- rooms[!is.na(rooms)] | |
rooms <- c(rooms[!is.na(rooms)]) | |
rooms <- rooms[na.omit(rooms)] | |
clean_rooms <- rooms[!is.na(rooms)] | |
clean_rooms | |
# households with more than 2 rooms | |
rooms[rooms >2] | |
length(rooms[rooms >2]) | |
cleaned_rooms <- rooms[!is.na(rooms)] | |
cleaned_rooms | |
length(cleaned_rooms[cleaned_rooms > 2]) | |
####### GGPLOT ######### | |
library(tidyverse) | |
# Download link for interviews_plotting.csv | |
#https://drive.google.com/file/d/1AVDcIcgWeQs4pEQVCaxMzQtdziHCMRYt/view?usp=sharing | |
# Unzip and place in your data_output folder. | |
interviews_plotting <- read_csv("data_output/interviews_plotting.csv") | |
# scatter plots | |
ggplot(interviews_plotting, # data | |
aes(x = no_membrs, y = number_items)) + # mapping - what to plot? | |
geom_point() # geometry, visual properties, how to display it? | |
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) + | |
geom_point(alpha = 0.5) | |
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) + | |
geom_jitter(alpha = 0.5) | |
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) + | |
geom_jitter(alpha = 0.5, color = "blue") | |
ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) + | |
geom_jitter(aes(color = village), alpha = 0.5) | |
interviews_plot <- ggplot(interviews_plotting, aes( x = no_membrs, y = number_items)) | |
interviews_plot + | |
geom_jitter() | |
# barplots | |
ggplot(interviews_plotting, aes(x = respondent_wall_type)) + | |
geom_bar() # number of cases | |
ggplot(interviews_plotting, aes(x = respondent_wall_type)) + | |
geom_bar(aes(fill = village)) # number of cases colored by village | |
ggplot(interviews_plotting, aes(x = respondent_wall_type)) + | |
geom_bar(aes(fill = village), position = "dodge") # number of cases colored by village | |
interviews_plotting %>% | |
filter(respondent_wall_type != "cement") %>% | |
count(village, respondent_wall_type) %>% | |
group_by(village) %>% | |
mutate(percent = n / sum(n) * 100) %>% | |
ggplot(aes(x = village, y = percent, fill = respondent_wall_type)) + | |
geom_col(position = "dodge") + # values in the data (instead of counts) | |
labs(title = " my great plot", x = "Village", y = "Percent") + | |
theme_bw() + | |
theme(text = element_text(size = 16)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment