Created
June 2, 2015 11:59
-
-
Save priyankajayaswal1/af7bfecd11076b409817 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(sqldf) | |
x<-read.table("//home//innovator//Documents//Internship//Data//household_power_consumption.txt",header=TRUE,sep=';',dec = '.',stringsAsFactors=FALSE) | |
x$Date<- as.Date(x$Date,"%d/%m/%Y") | |
x$Time = format(as.POSIXlt(x$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
x$Global_active_power = as.numeric(x$Global_active_power) | |
x$Global_reactive_power = as.numeric(x$Global_reactive_power) | |
x$KVA = sqrt(x$Global_active_power*x$Global_active_power+x$Global_reactive_power*x$Global_reactive_power) | |
# Creating sample pools for six month data | |
first<- as.Date('16-12-2008',format = '%d-%m-%Y') | |
second <- as.Date('30-06-2009',format = '%d-%m-%Y') | |
y = fn$sqldf("select * from x where Date>=$first and Date<=$second") | |
# Creating sample pools of data at time 0, 15min, 30min, 45min | |
z = fn$sqldf("select * from y where substr(Time,4,2)='00' or substr(Time,4,2)='15' or substr(Time,4,2)='30' or substr(Time,4,2)='45'") | |
n = as.numeric(sqldf("select count(distinct Time) from z")) | |
# Checking for which central tendency is apt | |
t = sqldf("select distinct Time from z") | |
t$Time = format(as.POSIXlt(t$Time, format = "%H:%M:%S"), format="%H:%M:%S") | |
tn = as.numeric(sqldf("select count(*) from t")) | |
Row_no = KVA = seq(1,tn) | |
t= cbind(t,Row_no,KVA) | |
rep_data = data.frame() | |
for (j in 1:n) | |
{ | |
e = fn$sqldf("select * from z where Time=(select Time from t limit $j-1,1)") | |
for (i in 1:7) | |
{ | |
f = fn$sqldf("select * from e where Date%7=$i-1") | |
plot(f$Date,f$KVA,xlab='Day No.',ylab='KVA', main = "Graphical representation of KVA value in 6 months for particular time.") | |
lines(f$Date,f$KVA) | |
# Finding the best optimized central tendency for the dataset | |
#(forparticular weekday and particular time) | |
nn = as.numeric(sqldf("select count(*) from f")) | |
hippo =d = NULL | |
k=0 | |
for (i1 in seq(1,nn)) | |
{ | |
d=NULL | |
d1 = as.numeric(fn$sqldf("select KVA from f limit $i1-1,1")) | |
for (j1 in 1:nn) | |
{ | |
d2 = as.numeric(fn$sqldf("select KVA from f limit $j1-1,1")) | |
d[j1]=(d2-d1)*.16 | |
} | |
hippo[i1]=sum(d) | |
k = k+1 | |
f$hippo[k]=hippo[i1] | |
print(paste0("Hippo: for ", d1, " and ", d2," = ", hippo[i1])) | |
} | |
rep_data=sqldf("select * from f where hippo=(select max(hippo)from f) limit 1") | |
repkva = as.numeric((sqldf("select KVA from rep_data"))) | |
reptime = as.character(sqldf("select Time from rep_data")) | |
r = as.numeric(fn$sqldf("select Row_no from t where Time=(select Time from rep_data)")) | |
t$KVA[r]=repkva | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment