Created
May 4, 2020 17:41
-
-
Save markoshorro/b7e693f9e2df278a78ea006b87dfe5c8 to your computer and use it in GitHub Desktop.
ParserQuiénEstáDelanteDeTi - MECD Premio Nacional Fin de Carrera
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import PyPDF2 | |
path_file = "..." | |
media_pond = 11.23 | |
meritos = 0.9 | |
file = open(path_file, "rb") | |
pdfReader = PyPDF2.PdfFileReader(file) | |
above = 0 | |
fuck = 0 | |
for i in range(5, 49): | |
t = pdfReader.getPage(i).extractText().replace(" ", "").split("\n") | |
cont = -1 | |
for s in t: | |
if "INGENIERÍAY" == s: | |
cont = 0 | |
prev = 0 | |
continue | |
if cont == -1: | |
continue | |
try: | |
f = float(re.sub(r'[A-Za-z]', '', s.replace(",", "."))) | |
if (f < 15.) and (f > 0.0): | |
cont += 1 | |
except: | |
continue | |
if cont == 4: | |
if (prev + f > media_pond + meritos): | |
fuck += 1 | |
cont = -1 | |
if cont == 3: | |
if (f > media_pond): | |
above += 1 | |
prev = f | |
else: | |
cont = -1 | |
print("total = " + str(above)) | |
print("total fucks = " + str(fuck)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This file is meant to parse this document [1]. The string result of parsing is not consistent through all rows, that is why I have a counter checking whether I have found in the same row 2 floats between 0 and 15 before.
[1] http://www.educacionyfp.gob.es/dam/jcr:33f0241f-f886-4c25-9186-6b45bdb181b4/resolucionprovisional.pdf