Created
October 8, 2017 12:55
-
-
Save dhaeb/f4e99c1c8e70b3b7c5ebfb8d920ed649 to your computer and use it in GitHub Desktop.
Scraping Bundestagswahlergebnis Ost/West
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grapes( | |
@Grab(group='net.sourceforge.htmlunit', module='htmlunit', version='2.27') | |
) | |
import com.gargoylesoftware.htmlunit.WebClient | |
import com.gargoylesoftware.htmlunit.html.HtmlPage | |
def isOsten = {bl -> | |
def ostenLabelBl = ["Sachsen", "Thüringen", "Sachsen-Anhalt", "Mecklenburg-Vorpommern", "Berlin", "Brandenburg"] | |
if (bl in ostenLabelBl) | |
"Osten" | |
else | |
"Westen" | |
} | |
final WebClient webClient = new WebClient() | |
def data = (1..16).collect { it -> | |
println(it) | |
webClient.getOptions().setJavaScriptEnabled(false); | |
final HtmlPage page = webClient.getPage("https://www.bundeswahlleiter.de/bundestagswahlen/2017/ergebnisse/bund-99/land-${it}.html"); | |
//[@id="table-8391"]/tbody[2]/tr[4]/td[2]/span | |
def afdDiv = page.getByXPath('//table/tbody[2]/tr[4]/td[5]'); // Zweitstimmen afd | |
def afdPercentDiv = page.getByXPath('//table/tbody[2]/tr[4]/td[6]'); // Zweitstimmenprozent afd im BL | |
def afdStimmen = Integer.parseInt(afdDiv[0].asText().replaceAll("\\.", "")) | |
def afdPercent= Double.parseDouble(afdPercentDiv[0].asText().replaceAll(",", ".")) | |
def blDiv = page.getByXPath('//*[@id="main"]/nav/ul/li[2]/div[1]/div/div/button/span')[0]; | |
def bl = blDiv.asText() | |
[bl, afdStimmen, isOsten(bl), afdPercent] | |
} | |
print(data) | |
data.inject([:]) {acc, e -> | |
def ow = e[2] | |
def stimmen = acc.get(ow, 0) | |
acc.put(ow, stimmen + e[1]) | |
acc | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment