Skip to content

Instantly share code, notes, and snippets.

@randyzwitch
Created September 10, 2014 20:42

Revisions

  1. randyzwitch created this gist Sep 10, 2014.
    40 changes: 40 additions & 0 deletions rsitecatalyst-sankey-one-page.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    library("RSiteCatalyst")
    library("d3Network")

    #### Authentication
    SCAuth("key", "secret")

    #### Get Pathing data: Single page, then ::anything:: pattern
    pathpattern <- c("http://randyzwitch.com/big-data-hadoop-amazon-ec2-cloudera-part-1", "::anything::")
    next_page <- QueuePathing("zwitchdev",
    "2014-01-01",
    "2014-08-31",
    metric="pageviews",
    element="page",
    pathpattern,
    top = 50000)

    #Optional step: Cleaning my pagename URLs to remove to domain for clarity
    next_page$step.1 <- sub("http://randyzwitch.com/","",
    next_page$step.1, ignore.case = TRUE)
    next_page$step.2 <- sub("http://randyzwitch.com/","",
    next_page$step.2, ignore.case = TRUE)

    #Get unique values of page name to create nodes df
    #Create an index value, starting at 0
    nodes <- as.data.frame(unique(c(next_page$step.1, next_page$step.2)))
    names(nodes) <- "name"
    nodes$nodevalue <- as.numeric(row.names(nodes)) - 1

    #Convert string to numeric nodeid
    links <- merge(next_page, nodes, by.x="step.1", by.y="name")
    names(links) <- c("step.1", "step.2", "value", "source")

    links <- merge(links, nodes, by.x="step.2", by.y="name")
    names(links) <- c("step.1", "step.2", "value", "source", "target")

    #Create next page Sankey chart
    d3output = "C:/Users/rzwitc200/Desktop/sankey.html"
    d3Sankey(Links = links, Nodes = nodes, Source = "source",
    Target = "target", Value = "value", NodeID = "name",
    fontsize = 12, nodeWidth = 100, file = d3output, width = 750, height = 600)