Skip to content

Instantly share code, notes, and snippets.

@bennylope
Forked from kennym/blogspot_to_jekyll.rb
Created October 5, 2011 15:23

Revisions

  1. bennylope revised this gist Oct 5, 2011. 1 changed file with 8 additions and 2 deletions.
    10 changes: 8 additions & 2 deletions blogspot_to_jekyll.rb
    100755 → 100644
    Original file line number Diff line number Diff line change
    @@ -22,8 +22,11 @@
    # * Make sure Blogger shows full output of article in feeds.
    # * Commenting on migrated articles will be set to false by default.

    # Add rubygems so that Ruby can find the installed gems
    require 'rubygems'
    require 'feedzirra'
    require 'date'
    require 'time'
    require 'optparse'


    @@ -32,7 +35,10 @@ def parse_post_entries(feed, verbose)
    feed.entries.each do |post|
    obj = Hash.new
    created_datetime = post.last_modified
    creation_date = Date.strptime(created_datetime.to_s, "%Y-%m-%d")
    # The previous line, immediately below, threw an ArgumentError, "invalid date"
    # and just using a time object instead solved the error
    #creation_date = Date.strptime(created_datetime.to_s, "%Y-%m-%d")
    creation_date = Date.parse(created_datetime.strftime("%Y-%m-%d"))
    title = post.title
    file_name = creation_date.to_s + "-" + title.split(/ */).join("-").delete('\/') + ".html"
    content = post.content
    @@ -108,4 +114,4 @@ def main
    puts "Done!"
    end

    main()
    main()
  2. @kennym kennym revised this gist Jul 30, 2011. 1 changed file with 9 additions and 3 deletions.
    12 changes: 9 additions & 3 deletions blogspot_to_jekyll.rb
    Original file line number Diff line number Diff line change
    @@ -5,7 +5,7 @@
    # Basic Usage
    # -----------
    #
    # ruby blogger_to_jekyll.rb feed_url
    # ./blogger_to_jekyll.rb feed_url
    #
    # where `feed_url` can have the following format:
    #
    @@ -16,6 +16,11 @@
    #
    # * feedzirra: https://github.com/pauldix/feedzirra
    #
    # Notes
    # -----
    #
    # * Make sure Blogger shows full output of article in feeds.
    # * Commenting on migrated articles will be set to false by default.

    require 'feedzirra'
    require 'date'
    @@ -36,6 +41,7 @@ def parse_post_entries(feed, verbose)
    obj["title"] = title
    obj["creation_datetime"] = created_datetime
    obj["content"] = content
    obj["categories"] = post.categories.join(" ")
    posts.push(obj)
    end
    return posts
    @@ -52,7 +58,7 @@ def write_posts(posts, verbose)
    title: #{post["title"]}
    date: #{post["creation_datetime"]}
    comments: false
    categories:
    categories: #{post["categories"]}
    ---
    }
    @@ -72,7 +78,7 @@ def write_posts(posts, verbose)
    def main
    options = {}
    opt_parser = OptionParser.new do |opt|
    opt.banner = "Usage: blogger_to_jekyll.rb FEED_URL [OPTIONS]"
    opt.banner = "Usage: ./blogger_to_jekyll.rb FEED_URL [OPTIONS]"
    opt.separator ""
    opt.separator "Options"

  3. @kennym kennym revised this gist Jul 30, 2011. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions blogspot_to_jekyll.rb
    Original file line number Diff line number Diff line change
    @@ -30,11 +30,12 @@ def parse_post_entries(feed, verbose)
    creation_date = Date.strptime(created_datetime.to_s, "%Y-%m-%d")
    title = post.title
    file_name = creation_date.to_s + "-" + title.split(/ */).join("-").delete('\/') + ".html"

    content = post.content

    obj["file_name"] = file_name
    obj["title"] = title
    obj["creation_datetime"] = created_datetime
    obj["content"] = post.content
    obj["content"] = content
    posts.push(obj)
    end
    return posts
    @@ -54,7 +55,7 @@ def write_posts(posts, verbose)
    categories:
    ---
    %}
    }
    File.open(file_name, "w+") {|f|
    f.write(header)
    f.write(post["content"])
  4. @kennym kennym revised this gist Jul 30, 2011. 1 changed file with 0 additions and 7 deletions.
    7 changes: 0 additions & 7 deletions blogspot_to_jekyll.rb
    Original file line number Diff line number Diff line change
    @@ -11,13 +11,6 @@
    #
    # http://{your_blog_name}.blogspot.com/feeds/posts/default
    #
    # Documentation
    # -------------
    #
    # Command-line arguments:
    # -v
    #
    #
    # Requirements
    # ------------
    #
  5. @kennym kennym revised this gist Jul 30, 2011. 1 changed file with 47 additions and 26 deletions.
    73 changes: 47 additions & 26 deletions blogspot_to_jekyll.rb
    100644 → 100755
    Original file line number Diff line number Diff line change
    @@ -1,47 +1,42 @@
    #!/usr/bin/env ruby
    #
    # Convert blogger (blogspot) posts to jekyll posts
    #
    # How to use
    # ----------
    # Basic Usage
    # -----------
    #
    # ruby blogger_to_jekyll.rb [feed_url]
    # ruby blogger_to_jekyll.rb feed_url
    #
    # What it does
    # ------------
    # where `feed_url` can have the following format:
    #
    # 1) Fetches the blog's feed
    # 2) For each post create a file with name
    # "YYYY-MM-DD-{post-title}.html", with the following structure:
    # http://{your_blog_name}.blogspot.com/feeds/posts/default
    #
    # ---
    # layout: post
    # title: #{post-title}
    # date: #{YYYY-mm-dd HH:MM}
    # comments: false
    # categories:
    # ---
    # Documentation
    # -------------
    #
    # #{blog_post_content_in_html_format}
    # Command-line arguments:
    # -v
    #
    # 3) Write each file to a directory named `_posts`
    #
    # Requirements
    # ------------
    #
    #
    # * feedzirra: https://github.com/pauldix/feedzirra
    #

    require 'feedzirra'
    require 'date'
    require 'optparse'


    def parse_post_entries(feed)
    def parse_post_entries(feed, verbose)
    posts = []
    feed.entries.each do |post|
    obj = Hash.new
    created_datetime = post.last_modified
    creation_date = Date.strptime(created_datetime.to_s, "%Y-%m-%d")
    title = post.title
    file_name = creation_date.to_s + "-" + title.split(/ */).join("-").delete('\/') + ".markdown"
    file_name = creation_date.to_s + "-" + title.split(/ */).join("-").delete('\/') + ".html"

    obj["file_name"] = file_name
    obj["title"] = title
    @@ -52,9 +47,10 @@ def parse_post_entries(feed)
    return posts
    end

    def write_posts(posts)
    def write_posts(posts, verbose)
    Dir.mkdir("_posts") unless File.directory?("_posts")

    total = posts.length, i = 1
    posts.each do |post|
    file_name = "_posts/".concat(post["file_name"])
    header = %{---
    @@ -71,20 +67,45 @@ def write_posts(posts)
    f.write(post["content"])
    f.close
    }

    if verbose
    puts " [#{i}/#{total[0]}] Written post #{file_name}"
    i += 1
    end
    end
    end

    def main
    feed_url = ARGV.first
    options = {}
    opt_parser = OptionParser.new do |opt|
    opt.banner = "Usage: blogger_to_jekyll.rb FEED_URL [OPTIONS]"
    opt.separator ""
    opt.separator "Options"

    opt.on("-v", "--verbose", "Print out all.") do
    options[:verbose] = true
    end
    end

    opt_parser.parse!

    puts "Fetching feed..."
    if ARGV[0]
    feed_url = ARGV.first
    else
    puts opt_parser
    exit()
    end

    puts "Fetching feed #{feed_url}..."
    feed = Feedzirra::Feed.fetch_and_parse(feed_url)

    puts "Parsing feed..."
    posts = parse_post_entries(feed)
    posts = parse_post_entries(feed, options[:verbose])

    puts "Writing posts..."
    write_posts(posts)
    puts "Writing posts to _posts/..."
    write_posts(posts, options[:verbose])

    puts "Done!"
    end

    main()
  6. @kennym kennym revised this gist Jul 30, 2011. 1 changed file with 13 additions and 8 deletions.
    21 changes: 13 additions & 8 deletions blogspot_to_jekyll.rb
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,10 @@
    # Convert blogger (blogspot) posts to jekyll posts
    #
    # How to use
    # ----------
    #
    # ruby blogger_to_jekyll.rb [feed_url]
    #
    # What it does
    # ------------
    #
    @@ -9,29 +14,26 @@
    #
    # ---
    # layout: post
    # title: {post-title}
    # date: {YYYY-mm-dd HH:MM}
    # title: #{post-title}
    # date: #{YYYY-mm-dd HH:MM}
    # comments: false
    # categories:
    # ---
    #
    # #{blog_post_content_in_html_format}
    #
    # 3) Write each file to a directory named `_posts`
    #
    # Requirements
    # ------------
    #
    # * feedzirra: https://github.com/pauldix/feedzirra
    #
    # How to use
    # ----------
    #
    # ruby blogger_to_jekyll.rb [feed_url]
    #

    require 'feedzirra'
    require 'date'


    def parse_post_entries(feed)
    posts = []
    feed.entries.each do |post|
    @@ -72,12 +74,15 @@ def write_posts(posts)
    end
    end

    def main(feed_url="http://feeds.feedburner.com/Kennys/dev/null?format=xml")
    def main
    feed_url = ARGV.first

    puts "Fetching feed..."
    feed = Feedzirra::Feed.fetch_and_parse(feed_url)

    puts "Parsing feed..."
    posts = parse_post_entries(feed)

    puts "Writing posts..."
    write_posts(posts)
    end
  7. @kennym kennym revised this gist Jul 30, 2011. 1 changed file with 24 additions and 10 deletions.
    34 changes: 24 additions & 10 deletions blogspot_to_jekyll.rb
    Original file line number Diff line number Diff line change
    @@ -1,19 +1,33 @@
    # Short overview:
    # 1) Fetch blogger feed
    # 2) For each post in feed
    # 1) create a file with name `YYYY-MM-DD-post-title.markdown`, with
    # the following structure:
    # Convert blogger (blogspot) posts to jekyll posts
    #
    # What it does
    # ------------
    #
    # 1) Fetches the blog's feed
    # 2) For each post create a file with name
    # "YYYY-MM-DD-{post-title}.html", with the following structure:
    #
    # ---
    # layout: post
    # title: `post-title`
    # date: 2011-07-30 10:44
    # comments: true
    # title: {post-title}
    # date: {YYYY-mm-dd HH:MM}
    # comments: false
    # categories:
    # ---
    #
    # #{blog_post_content_in_markdown_format}
    # 3) Write each file to a directory `_posts`
    # #{blog_post_content_in_html_format}
    # 3) Write each file to a directory named `_posts`
    #
    # Requirements
    # ------------
    #
    # * feedzirra: https://github.com/pauldix/feedzirra
    #
    # How to use
    # ----------
    #
    # ruby blogger_to_jekyll.rb [feed_url]
    #

    require 'feedzirra'
    require 'date'
  8. @kennym kennym created this gist Jul 30, 2011.
    71 changes: 71 additions & 0 deletions blogspot_to_jekyll.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,71 @@
    # Short overview:
    # 1) Fetch blogger feed
    # 2) For each post in feed
    # 1) create a file with name `YYYY-MM-DD-post-title.markdown`, with
    # the following structure:
    #
    # ---
    # layout: post
    # title: `post-title`
    # date: 2011-07-30 10:44
    # comments: true
    # categories:
    # ---
    #
    # #{blog_post_content_in_markdown_format}
    # 3) Write each file to a directory `_posts`

    require 'feedzirra'
    require 'date'

    def parse_post_entries(feed)
    posts = []
    feed.entries.each do |post|
    obj = Hash.new
    created_datetime = post.last_modified
    creation_date = Date.strptime(created_datetime.to_s, "%Y-%m-%d")
    title = post.title
    file_name = creation_date.to_s + "-" + title.split(/ */).join("-").delete('\/') + ".markdown"

    obj["file_name"] = file_name
    obj["title"] = title
    obj["creation_datetime"] = created_datetime
    obj["content"] = post.content
    posts.push(obj)
    end
    return posts
    end

    def write_posts(posts)
    Dir.mkdir("_posts") unless File.directory?("_posts")

    posts.each do |post|
    file_name = "_posts/".concat(post["file_name"])
    header = %{---
    layout: post
    title: #{post["title"]}
    date: #{post["creation_datetime"]}
    comments: false
    categories:
    ---
    %}
    File.open(file_name, "w+") {|f|
    f.write(header)
    f.write(post["content"])
    f.close
    }
    end
    end

    def main(feed_url="http://feeds.feedburner.com/Kennys/dev/null?format=xml")
    puts "Fetching feed..."
    feed = Feedzirra::Feed.fetch_and_parse(feed_url)

    puts "Parsing feed..."
    posts = parse_post_entries(feed)
    puts "Writing posts..."
    write_posts(posts)
    end

    main()