# making chroots in any diredctory you want, eg on removeable media
sudo sh ~/Downloads/crouton -r trusty -t x11 -p /media/removable/USB\ Drive/crouton/chroots/
# enter chroot from removeable media
sudo enter-chroot -c /media/removable/USB\ Drive/crouton/chroots/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# bootstrap a vagrant ubuntu guest to have the following: | |
# openssh server | |
# x2go server | |
# ufw | |
# fail2ban | |
# user args: STDIN is TSV of "username\tpubkey", one per line | |
# NB: | |
# run as root |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// use TextStatistics to get syllable counts from STDIN | |
// https://github.com/DaveChild/Text-Statistics | |
foreach (glob("DaveChild/TextStatistics/*.php") as $filename) | |
{ | |
include $filename; | |
} | |
$file = file_get_contents("php://stdin"); | |
echo DaveChild\TextStatistics\Syllables::syllableCount($file); | |
?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Insert your preferred key mappings here. | |
unmap d | |
map <c-f> scrollPageDown | |
unmap u | |
map <c-b> scrollPageUp | |
unmap d | |
map d removeTab | |
unmap u | |
map u restoreTab |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for every agency, get name,isodates,cost | |
# cost is just units*unit price | |
# ignore cost if it's <1 USD | |
in=eVA_healthyRecords_2015-07-12.tsv | |
while read agency; do cat $in | tawk "{if(\$2 ~ /\"${agency}\"/ && \$5*\$6 > 1)print \$2,\$3,\$6*\$5}" | sed 's:"::g' | sed 's:\t\([0-9]\{2\}\)\/\([0-9]\{2\}\)\/\([0-9]\{4\}\)\t:\t\3-\1-\2\t:g'; done < /tmp/agencies | sed '1 i\agency\tdate\tamount' >/tmp/eVA_costByDateByAgency.txt | |
# next take it into a db and group by date | |
~/git/aiddata-utils/etl/txt2pgsql.pl -i /tmp/eVA_rva.tsv -d "\t" -t "TEXT" -p del | sh | |
# group costs by agency,date | |
echo "copy ( select agency,date,sum(amount::numeric) as cost from \"eVA_rva\" group by agency,date ) to stdout with csv header;" | psql del > /tmp/eVA_rva.csv |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# bash magic - passing an arbitrary number of sed commands to eval on a file to remove stopword list we built based on 1-gram frequency and lack of vowels | |
# first we get a list of stopwords based on short strings, esp. w/o vowels, that are the most common 1-grams | |
# then we build sed commands around these stopwords, using word boundaries (eg \bRd\b) | |
# and we ask for a case-insensitive match (eg sed 's/pattern//I') | |
# then we string them together with pipes | |
# and cat the file of interst and eval all the sed commands! | |
# this removes the stopword list we build, with case-insensitive match, and using word boundaries | |
# advantage over sed OR (eg sed 's/Rd\|St//I') is that that can only do some many at a time | |
# would be more efficient of course to batch these into groups of *n* |
#identifying stopwords using ngrams and vowels
- we have: street names in Richmond, VA
- we want to: match street names to Confederate generals and Civil Rights leaders
- first we must: remove small pesky elements of names that are irrelevant to the match
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# download them all | |
wget -ci <( baseuri="ftp://ftp2.census.gov/geo/tiger/TIGER2014/ROADS/"; escaped_baseuri=$(echo "$baseuri" | sed "s:\/:\\\/:g" | sed "s/:/\\\:/g"); lftp -e 'find;exit' "$baseuri" | sed "s:^:${escaped_baseuri}:g" ) | |
# make single text file of unique FULLNAME roads names | |
find_ext zip | parallel -j2 'tmp=$(mktemp -d); unzip -d $tmp {} $(basename {} .zip).dbf; dbfdump --fields FULLNAME $tmp/*.dbf | grep -vE "^\s*$"; rm -r $tmp' | grep -vE "(Archive|inflating)[:]" | sed '1 i\FULLNAME'| sortfreq > ~/fullname_uniq.txt |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat totalValue_by_streetType.txt | Rio -d'\t' -ge 'png("/tmp/foo.png",width=9,height=6,res=600,units="in"); library(scales);df$type <- factor(df$type, levels=c("civil rights leader","either","confederate")); ggplot(df,aes(x=df$type,y=df$total_value)) + coord_trans(y = "log10") + coord_flip() + geom_boxplot(aes(fill=df$type),outlier.shape = 1, outlier.size = 1) + labs(x="street named after",y="property value, USD 2015 (log scale)") + theme(axis.text.y = element_text(size=15,angle=90,hjust=.5),legend.position = "none") + scale_fill_manual(values=c("#0066CC", "#70B359", "#999999")) + scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x)) + labs(titles="Value of Properties by Streets\nNamed After Confederates or Civil Rights Leaders\nRichmond VA 2015"); dev.off()' |
NewerOlder