Last active
August 29, 2015 14:02
-
-
Save btihen/f2cef87050bc29a83d2d to your computer and use it in GitHub Desktop.
Return Analysis - needs colors, line graph (box plots too)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
---------------- | |
# returns analysis | |
# Start in Bash | |
# METHOD 1 | |
curl -k https://inventory.las.ch/serial.csv?scope=computers > laptop-inventory.csv | |
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 > return-data.csv | |
echo "day hour" > return-intensive-by-hour.csv | |
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 | sed 's/:[0-9][0-9]:[0-9][0-9]$/:00/g' | grep '2014-05' | sed 's/2014-05-//g' | grep '2[0-9]' >> return-intensive-by-hour.csv | |
echo "day time" > return-intensive-by-tenmin.csv | |
cat laptop-inventory.csv | cut -d';' -f44 | grep 2014 | cut -d'"' -f2 | sed 's/[0-9]:[0-9][0-9]$/0/g' | grep '2014-05' | sed 's/2014-05-//g' | grep '2[0-9]' >> return-intensive-by-tenmin.csv | |
# switch to r | |
interest_hr = read.csv("return-intensive-by-hour.csv",sep=" ") | |
table(interest_hr) | |
library(vcd) | |
structable(table(interest_hr)) | |
pdf(file="./Returns_Each_Day.pdf") | |
barplot(interest_hr$day,main="Computer Returns by Day",xlab="Day",ylab="Returned Computers") | |
dev.off() | |
hour_day = as.data.frame.matrix(table(interest_hr$hour, interest_hr$day)) | |
day_hour = as.data.frame.matrix(table(interest_hr$day, interest_hr$hour)) | |
pdf(file="./Returns_Each_Hour.pdf") | |
boxplot(day_hour,main="Returns by hour",xlab="Hour",ylab="Returned Computers") | |
matlines(hour_day,pch=interest_hr$day,col=interest_hr$day,main="Returns each hour",xlab="Hour",ylab="Returned Computers") | |
dev.off() | |
boxplot(day_hour,main="Returns each hour",xlab="Hour",ylab="Returned Computers") | |
matlines(hour_day,pch=interest_hr$day,col=interest_hr$day,main="Returns each hour",xlab="Hour",ylab="Returned Computers") | |
barplot(interest_hr$day,main="Computer Returns by Day",xlab="Day",ylab="Returned Computers") | |
interest_ten = read.csv("return-intensive-by-tenmin.csv",sep=" ") | |
table(interest_ten) | |
library(vcd) | |
structable(table(interest_ten)) | |
time_day_ten = as.data.frame.matrix(table(interest_ten$time, interest_ten$day)) | |
day_time_ten = as.data.frame.matrix(table(interest_ten$day, interest_ten$time)) | |
pdf(file="./Returns_Each_10mins.pdf") | |
boxplot(day_time_ten,main="Returns each 10 min",xlab="Time",ylab="Returned Computers") | |
matlines(time_day_ten,pch=19,col=interest_ten$day,main="Returns each 10 mins",xlab="Time",ylab="Returned Computers") | |
dev.off() | |
boxplot(day_time_ten,main="Returns each 10 min",xlab="Time",ylab="Returned Computers") | |
matlines(time_day_ten,pch=19,col=interest_ten$day,main="Returns each 10 mins",xlab="Time",ylab="Returned Computers") | |
#-------------- | |
# METHOD 2 - POSIX time seems off by a day and possibly an hour! | |
# Start in R | |
#get data from inventory | |
fileURL = "https://inventory.las.ch/serial.csv?scope=computers" | |
download.file(fileURL, destfile="./computer-inventory.csv", method="curl") | |
# load data into r | |
returns = read.csv("./computer-inventory.csv",sep=";") | |
#returns=read.csv("returns-laptops-2014wk24.csv",sep=";") | |
#head(returns) | |
#str(returns) | |
#names(returns) | |
# get return information - separate from all other info | |
ret_fac = subset(returns$X44.usr_return_at, grepl("2014", returns$X44.usr_return_at)) | |
# convert to date_time data | |
ret_dt = as.POSIXlt(ret_fac) | |
# get days of interest | |
interest = subset(ret_dt, ret_dt > as.POSIXlt("2014-05-21") & ret_dt < as.POSIXlt("2014-05-29")) | |
# extract the interesting days | |
just_day = as.character( round(interest, "day")) | |
# just the hours | |
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC+0100") + round(as.numeric(interest)/3600)*3600,"%H:%M") | |
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/3600)*3600,"%H:%M") | |
just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="CET") + round(as.numeric(interest)/3600)*3600,"%H:%M") | |
interest_df = data.frame(just_day, just_hour) | |
# count interesting day frequencies | |
count_by_hr = table(just_day, just_hour) | |
by_quarter_hr = as.character(format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/900)*900,"%H:%M")) | |
count_by_quarter = table(just_day, by_quarter_hr) | |
# now how to get box charts and line charts out of this table (colorized by day) | |
pdf(file="./returns_by_hour.pdf") | |
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count") | |
dev.off() | |
pdf(file="./returns_by_quarter_hour.pdf") | |
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count") | |
dev.off() | |
pdf(file="./returns_by_day.pdf") | |
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned") | |
dev.off() | |
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count") | |
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count") | |
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned") | |
library(vcd) | |
structable(counts) | |
#---------- | |
# METHOD 3? | |
# start in R | |
#get data from inventory | |
fileURL = "https://inventory.las.ch/serial.csv?scope=computers" | |
download.file(fileURL, destfile="./computer-inventory.csv", method="curl") | |
# load data into r | |
computers = read.csv("./computer-inventory.csv",sep=";") | |
# get laptops | |
laptops = subset(returns, grepl("laptop", returns$X30.device_type)) | |
# get return time-dates (from 20th to 29th may 2014 -- 2014-05-2x) | |
returns = subset(laptops$X44.usr_return_at, grepl("2014-05-2", laptops$X44.usr_return_at)) | |
chars = as.character(returns) | |
day = gsub("2014-05-","",chars)) | |
#time = gsub("^2014-05-[0-9][0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]$", ) | |
---------------- | |
# returns analysis | |
#get data from inventory | |
fileURL = "https://inventory.las.ch/serial.csv?scope=computers" | |
download.file(fileURL, destfile="./computer-inventory.csv", method="curl") | |
# load data into r | |
returns = read.csv("./computer-inventory.csv",sep=";") | |
#returns=read.csv("returns-laptops-2014wk24.csv",sep=";") | |
#head(returns) | |
#str(returns) | |
#names(returns) | |
# get return information - separate from all other info | |
ret_fac = subset(returns$X44.usr_return_at, grepl("2014", returns$X44.usr_return_at)) | |
# convert to date_time data | |
ret_dt = as.POSIXlt(ret_fac) | |
# get days of interest | |
interest = subset(ret_dt, ret_dt > as.POSIXlt("2014-05-28") & ret_dt < as.POSIXlt("2014-05-29")) | |
# extract the interesting days | |
just_day = as.character( round(interest, "day")) | |
# just the hours | |
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC+0100") + round(as.numeric(interest)/3600)*3600,"%H:%M") | |
#just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/3600)*3600,"%H:%M") | |
just_hour = format(strptime("1970-01-01", "%Y-%m-%d", tz="CET") + round(as.numeric(interest)/3600)*3600,"%H:%M") | |
interest_df = data.frame(just_day, just_hour) | |
# count interesting day frequencies | |
count_by_hr = table(just_day, just_hour) | |
by_quarter_hr = as.character(format(strptime("1970-01-01", "%Y-%m-%d", tz="UTC") + round(as.numeric(interest)/900)*900,"%H:%M")) | |
count_by_quarter = table(just_day, by_quarter_hr) | |
# now how to get box charts and line charts out of this table (colorized by day) | |
pdf(file="./returns_by_hour.pdf") | |
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count") | |
dev.off() | |
pdf(file="./returns_by_quarter_hour.pdf") | |
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count") | |
dev.off() | |
pdf(file="./returns_by_day.pdf") | |
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned") | |
dev.off() | |
barplot(count_by_quarter, beside=T, main="Returns per Quarter Hour", xlab="hour", ylab="return/count") | |
barplot(count_by_hr, beside=T, main="Returns per Hour", xlab="hour", ylab="return/count") | |
barplot(table(just_day), main="Computers returned per day",xlab="date", ylab="computers returned") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment