Created
February 13, 2012 15:34
-
-
Save ariddell/1817639 to your computer and use it in GitHub Desktop.
random sample of English novels, 1800-1836 (Garside et al. bibliography)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(arm) | |
############################################################################### | |
## load data | |
############################################################################### | |
data_raw = "status year author further_editions | |
private/for-profit scan 1822 Isabel HILL 0 | |
public scan 1827 Sarah Wilmot WELLS 0 | |
public scan 1808 Ellen Rebecca WARNER 0 | |
public scan 1831 REYNOLDS, Frederick 0 | |
public scan 1823 George JONES 0 | |
public scan 1833 [BULWER LYTTON, Edward George] 1 | |
public scan 1828 Thomas Henry LISTER 1 | |
public scan 1822 Jean Charles L_onard SIMONDE DE SISMONDI 1 | |
public scan 1819 Adelaide O'KEEFFE 1 | |
public scan 1824 Hannah Maria JONES 0 | |
public scan 1826 Sir Walter SCOTT 1 | |
private/for-profit scan 1819 Elizabeth BENNETT 1 | |
public scan 1823 Lady Caroline LAMB 1 | |
public scan 1835 CAUNTER, {J}[ohn] Hobart 1 | |
public scan 1836 [HOOK, Theodore Edward] 1 | |
private/for-profit scan 1801 ANON 0 | |
public scan 1835 [DEACON, William Frederick] 1 | |
private/for-profit scan 1806 ANON 0 | |
public scan 1835 [SULLIVAN, Arabella Jane]; DACRE, Lady [Barbarina] (editor) 1 | |
no scan, copies survive 1808 ANON 0 | |
private/for-profit scan 1800 Susannah GUNNING 1 | |
no scan, copies survive 1801 Mary CHARLTON 1 | |
public scan 1833 [TONNA], Charlotte Elizabeth 1 | |
public scan 1805 Isaac D'ISRAELI 1 | |
no scan, no copy survives 1826 Friedrich August SCHULZE 0 | |
no scan, copies survive 1825 ANON 0 | |
public scan 1835 [BANIM, John and Michael; and MARTIN, Harriet Letitia] 1 | |
no scan, copies survive 1813 Anne Louise Germaine de STA_L-HOLSTEIN 0 | |
public scan 1829 Catharine HEAD 1 | |
public scan 1821 Pierce EGAN 1 | |
public scan 1814 ANON 1 | |
public scan 1804 Elisabeth GU_NARD 0 | |
public scan 1802 August Heinrich Julius LAFONTAINE 0 | |
public scan 1832 [JAMES, George Payne Rainsford] 1 | |
public scan 1817 Anne Julia Kemble HATTON 0 | |
public scan 1832 ST. LEDGER, [Francis] Barry [Boyle] 0 | |
no scan, copies survive 1803 Robert COUPER 0 | |
no scan, copies survive 1805 August Heinrich Julius LAFONTAINE 0 | |
no scan, copies survive 1823 ANON 0 | |
no scan, copies survive 1800 ANON 0 | |
no scan, copies survive 1818 ANON 1 | |
no scan, copies survive 1808 Augusta Amelia STUART 0 | |
public scan 1832 [CHAMIER, Frederick] 1 | |
public scan 1826 Mrs N. W. OLIVER 1 | |
public scan 1803 Sydney OWENSON [afterwards MORGAN, Lady Sydney] 1 | |
public scan 1816 Thomas Love PEACOCK 1 | |
no scan, copies survive 1801 Ann WINGROVE 0 | |
no scan, copies survive 1830 COATES, Mr [H 0 | |
no scan, copies survive 1810 Mary HOUGHTON 1 | |
public scan 1833 [GORE, Catherine Grace Frances] 0 | |
no scan, copies survive 1819 M. SMITH 0 | |
public scan 1806 Thomas Pike LATHY 0 | |
public scan 1831 [TAYLOR, Isaac] 0 | |
public scan 1801 ANON 0 | |
public scan 1823 Grace KENNEDY 1 | |
no scan, copies survive 1812 ANON 0 | |
public scan 1829 Timothy EAST 0 | |
public scan 1830 COOPER, James Fenimore 1 | |
no scan, copies survive 1816 ANON 0 | |
no scan, copies survive 1802 Marian MOORE 0 | |
private/for-profit scan 1803 J.-J.-M. DUPERCHE 0 | |
no scan, copies survive 1807 ANON 0 | |
no scan, copies survive 1813 Miriam MALDEN 0 | |
no scan, copies survive 1804 Mary TUCK 0 | |
public scan 1829 Catharine HEAD 1 | |
public scan 1834 [MARRYAT, Frederick] 1 | |
no scan, copies survive 1830 ANON 0 | |
private/for-profit scan 1812 Amelia BEAUCLERC 0 | |
no scan, copies survive 1811 ANON 0 | |
no scan, copies survive 1815 Adrien de SARRAZIN 1 | |
no scan, copies survive 1808 Mrs A. DUNCOMBE 1 | |
public scan 1828 Elizabeth Caroline GREY 1 | |
no scan, copies survive 1802 Anna MILLIKIN 0 | |
no scan, copies survive 1812 ANON 0 | |
public scan 1835 HOWITT, William 0 | |
no scan, copies survive 1836 ANON 0 | |
public scan 1828 Benjamin, Earl of Beaconsfield DISRAELI 1 | |
public scan 1817 Thomas Love PEACOCK 1 | |
public scan 1801 Elizabeth HELME 1 | |
public scan 1835 [SHEE, Sir Martin Archer] 1 | |
public scan 1806 ANON 0 | |
public scan 1834 JONES, Hannah Maria 1 | |
public scan 1832 [GALT, John] 1" | |
df = read.table(textConnection(data_raw), header=T, sep="\t", quote="") | |
############################################################################### | |
## make bar chart showing states | |
############################################################################### | |
# reorder levels | |
df$status = relevel(df$status,"private/for-profit scan") | |
df$status = relevel(df$status,"public scan") | |
# make plot | |
g = ggplot(data=df, aes(x=factor(""), fill=status)) + | |
geom_bar(position="fill") + | |
scale_y_continuous("",formatter="percent") + | |
scale_x_discrete("") + | |
coord_flip() | |
png("scan-status.png", units = 'in', res = 300, width=8, height=8 * 2/(1+sqrt(5))) | |
print(g) | |
dev.off() | |
############################################################################### | |
## logistic model | |
############################################################################### | |
# create the response variable | |
df$scanned = (df$status == 'public scan') | |
# check for NA values | |
stopifnot(!is.na(df$year)) | |
stopifnot(!is.na(df$scanned)) | |
stopifnot(!is.na(df$scanned)) | |
stopifnot(!is.na(df$further_editions)) | |
fit_0 = glm(scanned ~ 1, family=binomial(link="logit"), data=df) | |
display(fit_0) | |
fit_1 = glm(scanned ~ I(year-1800), family=binomial(link="logit"), data=df) | |
display(fit_1) | |
fit_2 = glm(scanned ~ I(year-1800) + further_editions, | |
family=binomial(link="logit"), data=df) | |
display(fit_2) | |
anova(fit_2, test="Chisq") | |
############################################################################### | |
## visualizations | |
############################################################################### | |
### basic plots using R | |
fit = fit_2 | |
png("scan-model.png", units = 'in', res = 300, width=8, height=8 * 2/(1+sqrt(5))) | |
curve(invlogit(coef(fit)[1] + coef(fit)[2]*(x-1800) + coef(fit)[3]*0), | |
1800, 1836, | |
ylim=c(0,1), | |
ylab="Proportion of novels with publicly accessible scans", | |
xlab="Year", | |
lty = 1) | |
curve(invlogit(coef(fit)[1] + coef(fit)[2]*(x-1800) + coef(fit)[3]*1), | |
1800, | |
1836, | |
col=2, | |
lty=2, | |
add=TRUE) | |
legend("topleft", 1.5, c("no further edition", "further edition(s)"), | |
col=1:2, lty=c(1,2), cex = 1.1, bty="n") | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment