Skip to content

Instantly share code, notes, and snippets.

@TGSmith
Last active December 19, 2015 08:09
Show Gist options
  • Save TGSmith/8f528186536d9d1bfa15 to your computer and use it in GitHub Desktop.
Save TGSmith/8f528186536d9d1bfa15 to your computer and use it in GitHub Desktop.
Solution for Scraping HN 1: Building Objects
#Solution for Challenge: Scraping HN 1: Building Objects. Started 2013-07-03T22:33:29+00:00
require 'nokogiri'
require 'pry'
class Post
attr_accessor :title, :url, :points, :comments_array #, :item_id
def initialize(args)
@title = args[:title]
@url = args[:url]
@points = args[:points]
@comments_array = []
end
def comments
#parser = Parser.new
@comments_array = Parser.new.create_comments
end
def add_comment
args = {}
# puts "Enter a username"
# var = gets.chomp
# args[:username] = var
# puts "Enter the URL"
# args[:url] = gets.chomp
# puts "Please add comment"
# args[:comment] = gets.chomp
args[:username] = "users name"
args[:url] = "the damn url"
args[:comment] = "this is a detailed comment .. yo"
@comments_array << Comment.new(args)
end
end
class Comment
attr_accessor :username, :url, :date_to_posted
attr_reader :comment
def initialize(args)
@username = args[:username]
@url = args[:url]
@date_to_posted = args[:date_to_posted]
# @date_to_posted = @date_to_posted || args[:date_to_posted]
@comment = args[:comment]
end
end
class Parser
attr_accessor :title, :url, :points, :args, :doc
def initialize
@doc = Nokogiri::HTML(File.open('hackernews.html'))
#@args = {}
end
def create_post
#binding.pry
args = {
title: @doc.search('td .title > a').inner_text,
url: @doc.search('td .title > .comhead').inner_text.gsub(/([^a-z.]+)/, ''),
points: @doc.search('td .subtext span').inner_text.gsub(/[^\d]/, ''),
#doc: @doc
# item_id: @doc.
}
#binding.pry
Post.new(args)
end
def create_comments
# doc.search('td > span.comment').map {|comment| comment.inner_text}
# doc.search('td.default > div > span.comhead')
# user_url = comment.css('a')[0]['href']
# user_name = comment.css('a')[0].text
# link_to_comment = comment.css('a')[1]['href']
# comment_object = doc.search('td.default')
# comment_text = comment.search('span.comment').inner_text
# comment_username = comment.css('a')[0].inner_text
# comment_url = comment.css('a')[1]['href']
# date_to_posted = comment.css('span.comhead').text.gsub(/\D/, "")
@doc.search('td.default').map do |comment|
args = {
username: comment.css('a')[0].inner_text,
url: comment.css('a')[1]['href'],
date_to_posted: comment.css('span.comhead').text.gsub(/\D/, ""),
comment: comment.search('span.comment').inner_text
}
Comment.new(args)
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment