Last active
February 12, 2021 17:01
-
-
Save loreabad6/59b4aed4cf0e04e4e96b09db1a7214ee to your computer and use it in GitHub Desktop.
Diferencia entre @yakuperezg y @LassoGuillermo, tweets de @angiegomeza de los resultados del CNE.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rtweet) | |
library(tidyverse) | |
library(lubridate) | |
library(gganimate) | |
# Extraer tweets de Andrea Gomez | |
reconteo = search_tweets( | |
q = "angiegomeza AND Actualización AND Diferencia", | |
include_rts = F | |
) | |
reconteo_tidy = reconteo %>% | |
# Seleccionar columnas de interes | |
select(created_at, text) %>% | |
# Cambiar a zona horaria de Ecuador | |
mutate(hora_ec = with_tz(created_at, tzone = "America/Bogota")) %>% | |
# Dividir texto del tweet en cada salto de linea | |
mutate(tmp_chunks = str_split(text, fixed("\n"), n = 5)) %>% | |
# Crear nuevas columnas en funcion de cada linea | |
mutate( | |
yaku = map_chr(tmp_chunks, function(s) s[which(str_detect(s, "Yaku"))]), | |
lasso = map_chr(tmp_chunks, function(s) s[which(str_detect(s, "Lasso"))][1]), | |
diff = map_chr(tmp_chunks, function(s) s[which(str_detect(s, "Diferencia"))]), | |
votos_por_comp = map_chr( | |
tmp_chunks, | |
function(s) { | |
t = s[which(str_detect(s, "Votos"))] | |
if (length(t) == 0) NA_character_ else t | |
} | |
) | |
) %>% | |
mutate(actas_por_comp = str_extract(text, "(?<=\\().+?(?=\\))")) %>% | |
# Seleccionar las columnas de interes | |
select(hora_ec, yaku, lasso, diff, actas_por_comp, votos_por_comp) %>% | |
# Cambiar el separador "." en diferencia (es en miles) | |
mutate_at(vars(diff, actas_por_comp, votos_por_comp), str_replace, "\\.", "") %>% | |
# Extraer solo valores numericos | |
mutate_if(is.character, str_extract, "\\d+\\.*\\d*") %>% | |
# Convertirlos en valores numericos | |
mutate_if(is.character, as.numeric) %>% | |
# Remover los dos primeros tweets (datos por candidato en millones y no porcentaje) | |
filter_at(vars(yaku, lasso), any_vars(. > 10)) | |
View(reconteo_tidy) | |
# Obtener posiciones en el tiempo para graficar | |
max_x_position = max(reconteo_tidy$hora_ec) | |
min_x_position = min(reconteo_tidy$hora_ec) | |
susp_x_start = min_x_position + (3600*9+550) | |
susp_x_end = min_x_position + (3600*15+550) | |
susp_x_mean = min_x_position + (3600*12+550) | |
# Truco para mantener el texto en el ultimo cuadro | |
# Repetir la primera fila con una hora mas | |
last_lasso = reconteo_tidy$lasso[1] | |
last_yaku = reconteo_tidy$yaku[1] | |
last_diff = reconteo_tidy$diff[1] | |
last_por_comp = reconteo_tidy$por_comp[1] | |
reconteo_tidy2 = reconteo_tidy %>% | |
add_row( | |
tibble_row( | |
hora_ec = max_x_position + 1200, | |
yaku = last_yaku, | |
lasso = last_lasso, | |
diff = last_diff, | |
por_comp = last_por_comp | |
), | |
.before = 1 | |
) | |
# Grafico del cambio en el procentaje de votos por candidato a la segunda vuelta | |
g = reconteo_tidy2 %>% | |
mutate(label_diff = paste( | |
"Diferencia:", diff, | |
"\nVotos aproximados\npor computar:\n", por_comp) | |
) %>% | |
pivot_longer( | |
-c(hora_ec, diff, por_comp, label_diff), | |
names_to = "candidato", | |
values_to = "porcentaje" | |
) %>% | |
ggplot(aes(x = hora_ec)) + | |
geom_vline(xintercept = susp_x_start, color = 'grey', size = 2) + | |
geom_vline(xintercept = susp_x_end, color = 'grey', size = 2) + | |
geom_line( | |
aes(color = candidato, y = porcentaje), | |
show.legend = T, size = 1.5 | |
) + | |
geom_ribbon( | |
data = reconteo_tidy2, | |
aes(ymin = lasso, ymax = yaku), | |
fill = "gray", alpha = 0.5 | |
) + | |
geom_text( | |
aes(label = label_diff), y = 19.95, x = max_x_position - 1800, | |
size = 5, nudge_x = -5000, check_overlap = T | |
) + | |
geom_label( | |
data = reconteo_tidy2, | |
aes(label = paste0(yaku, "%"), y = yaku), | |
size = 4.5, nudge_x = -500 | |
) + | |
geom_label( | |
data = reconteo_tidy2, | |
aes(label = paste0(lasso, "%"), y = lasso), | |
size = 4.5, nudge_x = -500 | |
) + | |
scale_x_datetime( | |
date_breaks = "4 hours", expand = expansion(mult = c(0, 0.15), add = 0), | |
labels = scales::date_format("%d-%m\n%H:%M", tz = "America/Bogota") | |
) + | |
scale_y_continuous( | |
labels = scales::percent_format(scale = 1, accuracy = 0.1) | |
) + | |
scale_color_manual( | |
"", | |
values = c("deepskyblue3", "purple"), | |
labels = c("Guillermo Lasso", "Yaku Pérez") | |
) + | |
annotate('text', y = 19.9, | |
x = susp_x_mean, label = "Suspención\ndel conteo") + | |
labs( | |
# title = "La carrera por la segunda vuelta...", | |
caption = "Fuente: CNE, @angiegomeza. Visualización: @loreabad6" | |
) + | |
theme_light() + | |
theme( | |
plot.title = element_text(hjust = 0.5), | |
legend.position = "top", | |
axis.title = element_blank(), | |
text = element_text(size = 16) | |
) + | |
transition_reveal( | |
hora_ec, | |
range(min_x_position, max_x_position), | |
keep_last = F | |
) | |
anim_save("conteo_votos.gif", end_pause = 25, | |
duration = 15, | |
animation = g, rewind = F, | |
width = 650, height = 400) | |
# Grafico de actas procesadas a traves del tiempo | |
g2 = reconteo_tidy %>% | |
# Calcular actas computadas desde el ultimo tweet hasta ahora | |
mutate(actas_computadadas = lead(actas_por_comp) - actas_por_comp) %>% | |
group_by(hora = floor_date(hora_ec, "1 hour")) %>% | |
summarize(actas_por_hora = sum(actas_computadadas)) %>% | |
filter( | |
hora >= as.POSIXct("2021-02-09 17:00:00", tz = "America/Bogota") & | |
hora <= as.POSIXct("2021-02-11 00:00:00", tz = "America/Bogota") | |
) %>% | |
ggplot(aes(x = hora, y = actas_por_hora)) + | |
geom_rect( | |
xmin = susp_x_start - 1800, ymin = -Inf, | |
xmax = susp_x_end - 1800, ymax = Inf, | |
fill = "grey90", alpha = 0.05 | |
) + | |
annotate('text', y = 300, | |
x = susp_x_mean - 1800, label = "Suspención\ndel conteo") + | |
geom_col( | |
aes(fill = actas_por_hora), | |
show.legend = F | |
) + | |
geom_text(aes(label = actas_por_hora), nudge_y = 15) + | |
scale_x_datetime( | |
date_breaks = "2 hours", date_minor_breaks = "1 hour", | |
expand = c(0.01, 0.1), | |
labels = scales::date_format("%d-%m\n%H:%M", tz = "America/Bogota") | |
) + | |
scale_fill_viridis_c(option = "D", direction = -1) + | |
labs( | |
title = "Actas computadas por hora", | |
subtitle = "Desde 09-02 17:00 hasta 11-02 00:00", | |
caption = "Fuente: CNE, @angiegomeza. Visualización: @loreabad6" | |
) + | |
theme_light() + | |
theme( | |
plot.title = element_text(hjust = 0.5, size = 14), | |
plot.subtitle = element_text(hjust = 0.5, size = 13), | |
legend.position = "top", | |
axis.title = element_blank(), | |
text = element_text(size = 13) | |
) | |
g3 = reconteo_tidy %>% | |
ggplot(aes(x = hora_ec, y = actas_por_comp)) + | |
geom_line(size = 0.5, color = "grey30", show.legend = F) + | |
geom_rect( | |
xmin = susp_x_start, ymin = -Inf, | |
xmax = susp_x_end, ymax = Inf, | |
fill = "grey90", alpha = 0.05, color = NA | |
) + | |
annotate('text', y = 300, | |
x = susp_x_mean, label = "Suspención\ndel conteo") + | |
geom_rect( | |
xmin = as.POSIXct("2021-02-11 01:00:00", tz = "America/Bogota"), | |
ymin = -Inf, | |
xmax = as.POSIXct("2021-02-11 07:00:00", tz = "America/Bogota"), | |
ymax = Inf, | |
fill = "grey90", alpha = 0.05, color = NA | |
) + | |
annotate('text', y = 300, | |
x = as.POSIXct("2021-02-11 04:00:00", tz = "America/Bogota"), | |
label = "Sin actividad\nen Twitter") + | |
geom_point( | |
aes(color = actas_por_comp), | |
size = 3, show.legend = F | |
) + | |
scale_x_datetime( | |
date_breaks = "3 hours", date_minor_breaks = "1 hour", | |
expand = c(0.01, 0.1), | |
labels = scales::date_format("%d-%m\n%H:%M", tz = "America/Bogota") | |
) + | |
scale_color_viridis_c(option = "D", direction = -1) + | |
scale_y_continuous(limits = c(0, NA)) + | |
labs( | |
title = "Actas por procesar", | |
caption = "Fuente: CNE, @angiegomeza. Visualización: @loreabad6" | |
) + | |
theme_light() + | |
theme( | |
plot.title = element_text(hjust = 0.5, size = 13), | |
legend.position = "top", | |
axis.title = element_blank(), | |
text = element_text(size = 12) | |
) | |
ggsave(filename = "actas_por_hora.png", plot = g2, width = 22, height = 14, units = 'cm') | |
ggsave(filename = "actas_por_procesar.png", plot = g3, width = 22, height = 14, units = 'cm') |
Author
loreabad6
commented
Feb 11, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment