reservoirinvest · November 27, 2017 12:55
diff --git a/scrape_trom_url.py b/scrape_trom_url.py
 ########################################################
 ####  Scrape a table with an known index into pandas
 ########################################################

 import pandas as pd

 ## Scrape a single table from an URL with table index
 symlotmarginurl = "https://www.5paisa.com/5pit/spma.asp"
 symlotmargin = pd.read_html(symlotmarginurl)[1]          # It's the second table

 ########################################################
 ####  Scrape a table with an id into pandas
 ########################################################

 import requests
 scrip = 'PFC'
 exp_date = '28DEC2017'
 url = "https://www.nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?&instrument=OPTSTK&symbol=" \
        + scrip + "&date="+ exp_date 

 # Options table
 html = requests.get(url).content
 nsetable = pd.read_html(html, attrs = {'id':'octable'}, header=1)[-1][:-1].drop(['Chart', 'Chart.1'], 1)

 ########################################################
 #### Scrape a value from JSON generated URL
 ########################################################

 import requests
 from bs4 import BeautifulSoup
 import json

 url = "https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=PFC&instrument=FUTSTK"
 html = requests.get(url).text
 soup = BeautifulSoup(html, 'html.parser')
 data=soup.find(id="responseDiv").text.strip()

 d1 = json.loads(data)
 d2=float(d1['data'][0]['annualisedVolatility'])
 d2

 #########################################################
 #### Scrape dividend from Google Finance page
 #########################################################

 from lxml import etree, html
 import requests

 url = "https://finance.google.com/finance?q=NSE:PFC"

 page = requests.get(url)
 root = html.fromstring(page.content)
 dividend = float(root.findall('.//table')[2].text_content().strip().split("\n")[2].split('/')[0])
 dividend

 ########################################################
 ####  Scrape a value from an element tree
 ########################################################

 import requests
 import lxml.html

 response = requests.get('http://www.un.org/en/sc/documents/resolutions/2016.shtml')
 tree = lxml.html.HTML(response.text)
 title_elem = tree.xpath('//title')[0]
 title_elem = tree.cssselect('title')[0]  # equivalent to previous XPath
 print("title tag:", title_elem.tag)
 print("title text:", title_elem.text_content())
 print("title html:", lxml.html.tostring(title_elem))
 print("title tag:", title_elem.tag)
 print("title's parent's tag:", title_elem.getparent().tag
	########################################################
	#### Scrape a table with an known index into pandas
	########################################################

	import pandas as pd

	## Scrape a single table from an URL with table index
	symlotmarginurl = "https://www.5paisa.com/5pit/spma.asp"
	symlotmargin = pd.read_html(symlotmarginurl)[1] # It's the second table

	########################################################
	#### Scrape a table with an id into pandas
	########################################################

	import requests
	scrip = 'PFC'
	exp_date = '28DEC2017'
	url = "https://www.nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?&instrument=OPTSTK&symbol=" \
	+ scrip + "&date="+ exp_date

	# Options table
	html = requests.get(url).content
	nsetable = pd.read_html(html, attrs = {'id':'octable'}, header=1)[-1][:-1].drop(['Chart', 'Chart.1'], 1)

	########################################################
	#### Scrape a value from JSON generated URL
	########################################################

	import requests
	from bs4 import BeautifulSoup
	import json

	url = "https://www.nseindia.com/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=PFC&instrument=FUTSTK"
	html = requests.get(url).text
	soup = BeautifulSoup(html, 'html.parser')
	data=soup.find(id="responseDiv").text.strip()

	d1 = json.loads(data)
	d2=float(d1['data'][0]['annualisedVolatility'])
	d2

	#########################################################
	#### Scrape dividend from Google Finance page
	#########################################################

	from lxml import etree, html
	import requests

	url = "https://finance.google.com/finance?q=NSE:PFC"

	page = requests.get(url)
	root = html.fromstring(page.content)
	dividend = float(root.findall('.//table')[2].text_content().strip().split("\n")[2].split('/')[0])
	dividend

	########################################################
	#### Scrape a value from an element tree
	########################################################

	import requests
	import lxml.html

	response = requests.get('http://www.un.org/en/sc/documents/resolutions/2016.shtml')
	tree = lxml.html.HTML(response.text)
	title_elem = tree.xpath('//title')[0]
	title_elem = tree.cssselect('title')[0] # equivalent to previous XPath
	print("title tag:", title_elem.tag)
	print("title text:", title_elem.text_content())
	print("title html:", lxml.html.tostring(title_elem))
	print("title tag:", title_elem.tag)
	print("title's parent's tag:", title_elem.getparent().tag