Created
December 15, 2015 16:14
-
-
Save tinproject/bb1538a61083d8e4cb21 to your computer and use it in GitHub Desktop.
Caliair, process air quality data from Ayto. Madrid.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Any, Callable, Iterable, Generator, List, Optional, Tuple, Dict | |
import datetime | |
from itertools import count | |
def identity(value: Any) -> Any: | |
""" | |
Identity function, takes something, returns something. | |
""" | |
return value | |
def bool_test(something: Any) -> bool: | |
""" | |
Test something with bool. | |
""" | |
return bool(something) | |
def remove_commas(line_gen: Iterable[str]) -> Generator[str, Any, Any]: | |
""" | |
Remove commas from a string returned by a generator | |
:param line_gen: iterator that yields a string | |
:return: a generator from a generator | |
""" | |
for l in line_gen: | |
line = l.replace(',', '') | |
yield line | |
def slice_str(start: int, stop: Optional[int]) -> Callable[[str], str]: | |
def f(s): | |
return s[start: stop:] | |
def g(s): | |
return s[start:] | |
return f if stop is not None else g | |
def strip(string: str, size: int) -> Tuple[int, str]: | |
for index in count(): | |
start = index * size | |
stop = start + size | |
if stop > len(string): | |
raise StopIteration | |
yield index, string[start: stop] | |
# los datos horarios en tiempo real tienen el año en cuatro cifras, los históricos en dos | |
def str_yymmdd_to_date(s): | |
result = datetime.datetime.strptime(s, "%y%m%d").date() | |
return result | |
def str_yyyymmdd_to_date(s): | |
result = datetime.datetime.strptime(s, "%Y%m%d").date() | |
return result | |
################################################################################ | |
# ---------- Classes for fields and registers | |
class Field: | |
def __init__(self, name: str, | |
extract: Callable[[str], str], | |
validate: Callable[[str], bool]=bool_test, | |
transform: Callable[[str], Any]=identity) -> 'Field': | |
""" | |
Field object | |
:param name: name of the field | |
:param extract: extract the field from the original string | |
:param validate: validates the value of the field, can be used for logging | |
:param transform: transforms the string extracted to the correct type | |
:return: | |
""" | |
self.name = name | |
self.extract = extract | |
self.validate = validate | |
self.transform = transform | |
def to_key_value(self, record: str) -> [str, Any]: | |
field = self.extract(record) | |
if self.validate(field): | |
return self.name, self.transform(field) | |
class RepeatableField: | |
def __init__(self, | |
extract: Callable[[str], str], | |
size: int, | |
fields: List[Field], | |
index_label: str, | |
index_transform: Callable[[int], str]=lambda x: str(x)) -> 'RepeatableField': | |
""" | |
Repeatable field oject, have some Fields repeated withing a record | |
:param extract: funtion to extract the RepeatableField from a record (string) | |
:param size: the size of the repeated part | |
:param fields: list of field that forms the repeated part | |
:param index_label: label to the index of the repeated portion | |
:param index_transform: function to adapt the index of the repeated part | |
:return: RepeatabeField object | |
""" | |
self.extract = extract | |
self.size = size | |
self.fields = fields | |
self.index_label = index_label | |
self.index_transform = index_transform | |
def to_key_value(self, record: str) -> Generator[Dict[str, Any], Any, Any]: | |
repeatable_field = self.extract(record) | |
for index, rep_record in strip(repeatable_field, self.size): | |
result = dict((field.to_key_value(rep_record) for field in self.fields)) | |
if self.index_label: | |
result[self.index_label] = self.index_transform(index) | |
yield result | |
class RepeatableRegister: | |
def __init__(self, fixed_fields: List[Field], repeatable_field: RepeatableField) -> 'RepeatableRegister': | |
self.fixed_fields = fixed_fields | |
self.repeatable_field = repeatable_field | |
def str_to_dict_gen(self, gen: Iterable) -> Generator[Dict[str, Any], Any, Any]: | |
for record in gen: | |
fixed = dict(field.to_key_value(record) for field in self.fixed_fields) | |
for repeated in self.repeatable_field.to_key_value(record): | |
repeated.update(fixed) | |
yield repeated | |
def filter_by(field_gen: Iterable[Dict[str, Any]], **kwargs) -> Generator[Dict[str, Any], Any, Any]: | |
""" | |
Filter some Iterable of dicts, comparing function keywords and values | |
:param field_gen: Iterable of dicts | |
:param kwargs: key=value to filter | |
:return: a generator to the filtered values | |
""" | |
for record in field_gen: | |
if all(arg in record and (record[arg] == value) for arg, value in kwargs.items()): | |
yield record | |
################################################################################ | |
# ---------- Definición de los campos de contaminación del aire. | |
fixed_fields = [Field('codigo_estacion', slice_str(0, 8)), | |
Field('magnitud_medida', slice_str(8, 10)), | |
Field('tecnica_analitica', slice_str(10, 12)), | |
Field('periodo', slice_str(12, 14)), | |
Field('fecha', slice_str(14, 22), transform=str_yyyymmdd_to_date), | |
] | |
repeated_fields = [Field('valor', slice_str(0, 5)), | |
Field('validez', slice_str(5, 6))] | |
repeatable_field = RepeatableField(extract=slice_str(22, None), | |
size=6, | |
fields=repeated_fields, | |
index_label='intervalo', | |
index_transform=lambda x: str(x+1)) | |
tiempo_real = RepeatableRegister(fixed_fields, repeatable_field) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
magnitud_medida = { | |
"01": "Dióxido de Azufre", | |
"06": "Monóxido de Carbono", | |
"07": "Monóxido de Nitrógeno", | |
"08": "Dióxido de Nitrógeno", | |
"09": "Partículas < 2.5 μm", | |
"10": "Partículas < 10 μm", | |
"12": "Óxidos de Nitrógeno", | |
"14": "Ozono", | |
"20": "Tolueno", | |
"30": "Benceno", | |
"35": "Etilbenceno", | |
"37": "Metaxileno", | |
"38": "Paraxileno", | |
"39": "Ortoxileno", | |
"42": "Hidrocarburos totales (hexano)", | |
"43": "Hidrocarburos (metano)", | |
"44": "Hidrocarburos no metánicos (hexano)", | |
"80": "Radiación ultravioleta", | |
"81": "Velocidad del viento", | |
"82": "Dirección del viento", | |
"83": "Temperatura", | |
"86": "Humedad relativa", | |
"87": "Presión", | |
"88": "Radiación solar", | |
"89": "Precipitación", | |
"92": "Lluvia ácida", | |
} | |
tecnica_analitica = { | |
"38": "Fluorescencia ultravioleta", | |
"48": "Absorción infrarroja", | |
"08": "Quimioluminiscencia", | |
"47": "Microbalanza", | |
"06": "Absorción ultravioleta", | |
"59": "Cromatografía de gases", | |
"02": "Ionización de llama", | |
"98": "Sensores meteorológicos", | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment