Created
April 24, 2014 19:04
-
-
Save adamhaile/11265756 to your computer and use it in GitHub Desktop.
TriNUG F# / Data Analytics Twitter Analysis using idiomatic functional style
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace NewCo.TwitterAnalysis.Idiomatic | |
open System | |
type tweet = DateTime * int * string | |
type stockVolume = DateTime * int | |
type tweetProvider = string -> tweet seq | |
type stockVolumeProvider = string -> stockVolume seq | |
module Analysis = | |
open MathNet.Numerics.Statistics | |
let correlate (tweets : tweet seq) (stockVolumes : stockVolume seq) = | |
let tweetVolumes = tweets |> Seq.countBy (fun (d,_,_) -> d.Date) | |
let tweetDays = tweetVolumes |> Seq.map fst |> Set.ofSeq | |
let stockDays = stockVolumes |> Seq.map fst |> Set.ofSeq | |
let commonDays = Set.intersect tweetDays stockDays | |
let tweetCalendar = Map.ofSeq tweetVolumes | |
let stockCalendar = Map.ofSeq stockVolumes | |
let tweetDayVolumes = commonDays |> Seq.map (fun d -> double tweetCalendar.[d]) | |
let stockDayVolumes = commonDays |> Seq.map (fun d -> double stockCalendar.[d]) | |
Correlation.Pearson(tweetDayVolumes, stockDayVolumes) | |
let correlateProviders tweetProvider stockVolumeProvider stock = | |
correlate (tweetProvider stock) (stockVolumeProvider stock) | |
module TwitterProvider = | |
open System.Configuration | |
open Tweetinvi | |
let getTweets : tweetProvider = fun stock -> | |
let cfg = ConfigurationManager.AppSettings in | |
TwitterCredentials.SetCredentials(cfg.["accessToken"], cfg.["accessTokenSecret"], cfg.["consumerKey"], cfg.["consumerSecret"]) | |
let tweets = Search.SearchTweets stock | |
tweets |> Seq.map (fun t -> t.CreatedAt, t.RetweetCount, t.Text) | |
module YahooProvider = | |
open FSharp.Data | |
type YahooStocksCsv = CsvProvider<"http://ichart.finance.yahoo.com/table.csv?s=MSFT"> | |
let getStockVolumes : stockVolumeProvider = fun stock -> | |
let data = YahooStocksCsv.Load("http://ichart.finance.yahoo.com/table.csv?s=" + stock) | |
data.Rows |> Seq.map (fun d -> d.Date, d.Volume) | |
module FileProvider = | |
open System.IO | |
open System.Reflection | |
open FSharp.Data | |
type TweetsCsv = CsvProvider<"TweetData.csv"> | |
type StockVolumesCsv = CsvProvider<"StockData.csv"> | |
let private absolute file = | |
if Path.IsPathRooted file then file else | |
// use reflection to get the CodeBase of the executing assembly - .Location doesn't work in NUnit | |
let exeDir = Path.GetDirectoryName((new Uri(Assembly.GetExecutingAssembly().CodeBase)).LocalPath) | |
Path.Combine(exeDir, file) | |
let getTweets : string -> tweetProvider = fun file stocksymbol -> | |
let table = TweetsCsv.Load(absolute(file)) | |
table.Rows |> Seq.map (fun r -> r.Date, r.Retweets, r.Text) | |
let getStockVolumes : string -> stockVolumeProvider = fun file stock -> | |
let table = StockVolumesCsv.Load(absolute(file)) | |
table.Rows |> Seq.map (fun d -> d.Date, d.Volume) | |
module InMemoryProvider = | |
let getTweets : tweetProvider = fun stock -> | |
[ | |
new DateTime(2014, 4, 15), 0, "Test1.1" | |
new DateTime(2014, 4, 16), 0, "Test2.1" | |
new DateTime(2014, 4, 16), 0, "Test2.2" | |
new DateTime(2014, 4, 17), 0, "Test3.1" | |
new DateTime(2014, 4, 17), 0, "Test3.2" | |
new DateTime(2014, 4, 17), 0, "Test3.3" | |
new DateTime(2014, 4, 18), 0, "Test4.1" | |
new DateTime(2014, 4, 18), 0, "Test4.2" | |
new DateTime(2014, 4, 18), 0, "Test4.3" | |
new DateTime(2014, 4, 18), 0, "Test4.4" | |
new DateTime(2014, 4, 19), 0, "Test5.1" | |
new DateTime(2014, 4, 19), 0, "Test5.2" | |
new DateTime(2014, 4, 19), 0, "Test5.3" | |
new DateTime(2014, 4, 19), 0, "Test5.4" | |
new DateTime(2014, 4, 19), 0, "Test5.5" | |
new DateTime(2014, 4, 20), 0, "Test6.1" | |
new DateTime(2014, 4, 20), 0, "Test6.2" | |
new DateTime(2014, 4, 20), 0, "Test6.3" | |
new DateTime(2014, 4, 20), 0, "Test6.4" | |
new DateTime(2014, 4, 20), 0, "Test6.5" | |
new DateTime(2014, 4, 20), 0, "Test6.6" | |
] |> Seq.ofList | |
let getStockVolumes : stockVolumeProvider = fun stock -> | |
[ | |
new DateTime(2014, 4, 15), 10000 | |
new DateTime(2014, 4, 16), 20000 | |
new DateTime(2014, 4, 17), 30000 | |
new DateTime(2014, 4, 18), 40000 | |
new DateTime(2014, 4, 19), 50000 | |
new DateTime(2014, 4, 20), 60000 | |
] |> Seq.ofList | |
module Tests = | |
open NUnit.Framework | |
open FsUnit | |
[<TestFixture>] | |
type ``Analysis Tests`` ()= | |
[<Test>] member x. | |
``Analysis.correlateProviders using InMemoryProviders should return 1`` ()= | |
let correlateInMemory = Analysis.correlateProviders InMemoryProvider.getTweets InMemoryProvider.getStockVolumes in | |
correlateInMemory "IBM" |> should equal 1.0 | |
[<Test>] member x. | |
``Analysis.correlateProviders using live Twitter and Yahoo on IBM should not throw an exception`` ()= | |
let correlateLive = Analysis.correlateProviders TwitterProvider.getTweets YahooProvider.getStockVolumes in | |
correlateLive "IBM" |> ignore | |
[<TestFixture>] | |
type ``TwitterProvider Tests`` () = | |
[<Test>] member x. | |
``TwitterProvider.getTweets when called with IBM should return non-null`` ()= | |
TwitterProvider.getTweets "IBM" |> should not' (equal null) | |
[<TestFixture>] | |
type ``YahooProvider Tests`` () = | |
[<Test>] member x. | |
``YahooProvider.getStockVolumes when called with IBM should not return non-null`` ()= | |
YahooProvider.getStockVolumes "IBM" |> should not' (equal null) | |
[<TestFixture>] | |
type ``FileProvider Tests`` () = | |
[<Test>] member x. | |
``FileSystemProvider.getTweets returns expected items`` ()= | |
FileProvider.getTweets "TweetData.csv" "IBM" |> Seq.length |> should equal 2 | |
[<Test>] member x. | |
``FileSystemProvider.getStockVolumes returns expected items`` ()= | |
FileProvider.getStockVolumes "StockData.csv" "IBM" |> Seq.length |> should equal 2 | |
[<TestFixture>] | |
type ``InMemoryProvider Tests`` ()= | |
[<Test>] member x. | |
``InMemoryProvider.getTweets returns expected items`` ()= | |
InMemoryProvider.getTweets "IBM" |> Seq.length |> should equal 21 | |
[<Test>] member x. | |
``InMemoryProvider.getStockVolumes returns expected items`` ()= | |
InMemoryProvider.getStockVolumes "IBM" |> Seq.length |> should equal 6 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Love it!