tinproject · December 15, 2015 16:14
diff --git a/caliair.py b/caliair.py
 from typing import Any, Callable, Iterable, Generator, List, Optional, Tuple, Dict
 import datetime
 from itertools import count


 def identity(value: Any) -> Any:
    """
    Identity function, takes something, returns something.
    """
    return value


 def bool_test(something: Any) -> bool:
    """
    Test something with bool.
    """
    return bool(something)


 def remove_commas(line_gen: Iterable[str]) -> Generator[str, Any, Any]:
    """
    Remove commas from a string returned by a generator
    :param line_gen: iterator that yields a string
    :return: a generator from a generator
    """
    for l in line_gen:
        line = l.replace(',', '')
        yield line


 def slice_str(start: int, stop: Optional[int]) -> Callable[[str], str]:
    def f(s):
        return s[start: stop:]

    def g(s):
        return s[start:]

    return f if stop is not None else g


 def strip(string: str, size: int) -> Tuple[int, str]:
    for index in count():
        start = index * size
        stop = start + size
        if stop > len(string):
            raise StopIteration
        yield index, string[start: stop]


 # los datos horarios en tiempo real tienen el año en cuatro cifras, los históricos en dos
 def str_yymmdd_to_date(s):
    result = datetime.datetime.strptime(s, "%y%m%d").date()
    return result


 def str_yyyymmdd_to_date(s):
    result = datetime.datetime.strptime(s, "%Y%m%d").date()
    return result


 ################################################################################
 # ---------- Classes for fields and registers

 class Field:
    def __init__(self, name: str,
                 extract: Callable[[str], str],
                 validate: Callable[[str], bool]=bool_test,
                 transform: Callable[[str], Any]=identity) -> 'Field':
        """
        Field object
        :param name: name of the field
        :param extract: extract the field from the original string
        :param validate: validates the value of the field, can be used for logging
        :param transform: transforms the string extracted to the correct type
        :return:
        """
        self.name = name
        self.extract = extract
        self.validate = validate
        self.transform = transform

    def to_key_value(self, record: str) -> [str, Any]:
        field = self.extract(record)
        if self.validate(field):
            return self.name, self.transform(field)


 class RepeatableField:
    def __init__(self,
                 extract: Callable[[str], str],
                 size: int,
                 fields: List[Field],
                 index_label: str,
                 index_transform: Callable[[int], str]=lambda x: str(x)) -> 'RepeatableField':
        """
        Repeatable field oject, have some Fields repeated withing a record
        :param extract: funtion to extract the RepeatableField from a record (string)
        :param size: the size of the repeated part
        :param fields: list of field that forms the repeated part
        :param index_label: label to the index of the repeated portion
        :param index_transform: function to adapt the index of the repeated part
        :return: RepeatabeField object
        """
        self.extract = extract
        self.size = size
        self.fields = fields
        self.index_label = index_label
        self.index_transform = index_transform

    def to_key_value(self, record: str) -> Generator[Dict[str, Any], Any, Any]:
        repeatable_field = self.extract(record)
        for index, rep_record in strip(repeatable_field, self.size):
            result = dict((field.to_key_value(rep_record) for field in self.fields))
            if self.index_label:
                result[self.index_label] = self.index_transform(index)
            yield result


 class RepeatableRegister:
    def __init__(self, fixed_fields: List[Field], repeatable_field: RepeatableField) -> 'RepeatableRegister':
        self.fixed_fields = fixed_fields
        self.repeatable_field = repeatable_field

    def str_to_dict_gen(self, gen: Iterable) -> Generator[Dict[str, Any], Any, Any]:
        for record in gen:
            fixed = dict(field.to_key_value(record) for field in self.fixed_fields)
            for repeated in self.repeatable_field.to_key_value(record):
                repeated.update(fixed)
                yield repeated


 def filter_by(field_gen: Iterable[Dict[str, Any]], **kwargs) -> Generator[Dict[str, Any], Any, Any]:
    """
    Filter some Iterable of dicts, comparing function keywords and values
    :param field_gen: Iterable of dicts
    :param kwargs: key=value to filter
    :return: a generator to the filtered values
    """
    for record in field_gen:
        if all(arg in record and (record[arg] == value) for arg, value in kwargs.items()):
            yield record


 ################################################################################
 # ---------- Definición de los campos de contaminación del aire.

 fixed_fields = [Field('codigo_estacion', slice_str(0, 8)),
                Field('magnitud_medida', slice_str(8, 10)),
                Field('tecnica_analitica', slice_str(10, 12)),
                Field('periodo', slice_str(12, 14)),
                Field('fecha', slice_str(14, 22), transform=str_yyyymmdd_to_date),
                ]

 repeated_fields = [Field('valor', slice_str(0, 5)),
                   Field('validez', slice_str(5, 6))]

 repeatable_field = RepeatableField(extract=slice_str(22, None),
                                   size=6,
                                   fields=repeated_fields,
                                   index_label='intervalo',
                                   index_transform=lambda x: str(x+1))

 tiempo_real = RepeatableRegister(fixed_fields, repeatable_field)
diff --git a/datos.py b/datos.py
 magnitud_medida = {
    "01": "Dióxido de Azufre",
    "06": "Monóxido de Carbono",
    "07": "Monóxido de Nitrógeno",
    "08": "Dióxido de Nitrógeno",
    "09": "Partículas < 2.5 μm",
    "10": "Partículas < 10 μm",
    "12": "Óxidos de Nitrógeno",
    "14": "Ozono",
    "20": "Tolueno",
    "30": "Benceno",
    "35": "Etilbenceno",
    "37": "Metaxileno",
    "38": "Paraxileno",
    "39": "Ortoxileno",
    "42": "Hidrocarburos totales (hexano)",
    "43": "Hidrocarburos (metano)",
    "44": "Hidrocarburos no metánicos (hexano)",
    "80": "Radiación ultravioleta",
    "81": "Velocidad del viento",
    "82": "Dirección del viento",
    "83": "Temperatura",
    "86": "Humedad relativa",
    "87": "Presión",
    "88": "Radiación solar",
    "89": "Precipitación",
    "92": "Lluvia ácida",
 }

 tecnica_analitica = {
    "38": "Fluorescencia ultravioleta",
    "48": "Absorción infrarroja",
    "08": "Quimioluminiscencia",
    "47": "Microbalanza",
    "06": "Absorción ultravioleta",
    "59": "Cromatografía de gases",
    "02": "Ionización de llama",
    "98": "Sensores meteorológicos",
 }
diff --git a/Use case.ipynb b/Use case.ipynb
	from typing import Any, Callable, Iterable, Generator, List, Optional, Tuple, Dict
	import datetime
	from itertools import count


	def identity(value: Any) -> Any:
	"""
	Identity function, takes something, returns something.
	"""
	return value


	def bool_test(something: Any) -> bool:
	"""
	Test something with bool.
	"""
	return bool(something)


	def remove_commas(line_gen: Iterable[str]) -> Generator[str, Any, Any]:
	"""
	Remove commas from a string returned by a generator
	:param line_gen: iterator that yields a string
	:return: a generator from a generator
	"""
	for l in line_gen:
	line = l.replace(',', '')
	yield line


	def slice_str(start: int, stop: Optional[int]) -> Callable[[str], str]:
	def f(s):
	return s[start: stop:]

	def g(s):
	return s[start:]

	return f if stop is not None else g


	def strip(string: str, size: int) -> Tuple[int, str]:
	for index in count():
	start = index * size
	stop = start + size
	if stop > len(string):
	raise StopIteration
	yield index, string[start: stop]


	# los datos horarios en tiempo real tienen el año en cuatro cifras, los históricos en dos
	def str_yymmdd_to_date(s):
	result = datetime.datetime.strptime(s, "%y%m%d").date()
	return result


	def str_yyyymmdd_to_date(s):
	result = datetime.datetime.strptime(s, "%Y%m%d").date()
	return result


	################################################################################
	# ---------- Classes for fields and registers

	class Field:
	def __init__(self, name: str,
	extract: Callable[[str], str],
	validate: Callable[[str], bool]=bool_test,
	transform: Callable[[str], Any]=identity) -> 'Field':
	"""
	Field object
	:param name: name of the field
	:param extract: extract the field from the original string
	:param validate: validates the value of the field, can be used for logging
	:param transform: transforms the string extracted to the correct type
	:return:
	"""
	self.name = name
	self.extract = extract
	self.validate = validate
	self.transform = transform

	def to_key_value(self, record: str) -> [str, Any]:
	field = self.extract(record)
	if self.validate(field):
	return self.name, self.transform(field)


	class RepeatableField:
	def __init__(self,
	extract: Callable[[str], str],
	size: int,
	fields: List[Field],
	index_label: str,
	index_transform: Callable[[int], str]=lambda x: str(x)) -> 'RepeatableField':
	"""
	Repeatable field oject, have some Fields repeated withing a record
	:param extract: funtion to extract the RepeatableField from a record (string)
	:param size: the size of the repeated part
	:param fields: list of field that forms the repeated part
	:param index_label: label to the index of the repeated portion
	:param index_transform: function to adapt the index of the repeated part
	:return: RepeatabeField object
	"""
	self.extract = extract
	self.size = size
	self.fields = fields
	self.index_label = index_label
	self.index_transform = index_transform

	def to_key_value(self, record: str) -> Generator[Dict[str, Any], Any, Any]:
	repeatable_field = self.extract(record)
	for index, rep_record in strip(repeatable_field, self.size):
	result = dict((field.to_key_value(rep_record) for field in self.fields))
	if self.index_label:
	result[self.index_label] = self.index_transform(index)
	yield result


	class RepeatableRegister:
	def __init__(self, fixed_fields: List[Field], repeatable_field: RepeatableField) -> 'RepeatableRegister':
	self.fixed_fields = fixed_fields
	self.repeatable_field = repeatable_field

	def str_to_dict_gen(self, gen: Iterable) -> Generator[Dict[str, Any], Any, Any]:
	for record in gen:
	fixed = dict(field.to_key_value(record) for field in self.fixed_fields)
	for repeated in self.repeatable_field.to_key_value(record):
	repeated.update(fixed)
	yield repeated


	def filter_by(field_gen: Iterable[Dict[str, Any]], **kwargs) -> Generator[Dict[str, Any], Any, Any]:
	"""
	Filter some Iterable of dicts, comparing function keywords and values
	:param field_gen: Iterable of dicts
	:param kwargs: key=value to filter
	:return: a generator to the filtered values
	"""
	for record in field_gen:
	if all(arg in record and (record[arg] == value) for arg, value in kwargs.items()):
	yield record


	################################################################################
	# ---------- Definición de los campos de contaminación del aire.

	fixed_fields = [Field('codigo_estacion', slice_str(0, 8)),
	Field('magnitud_medida', slice_str(8, 10)),
	Field('tecnica_analitica', slice_str(10, 12)),
	Field('periodo', slice_str(12, 14)),
	Field('fecha', slice_str(14, 22), transform=str_yyyymmdd_to_date),
	]

	repeated_fields = [Field('valor', slice_str(0, 5)),
	Field('validez', slice_str(5, 6))]

	repeatable_field = RepeatableField(extract=slice_str(22, None),
	size=6,
	fields=repeated_fields,
	index_label='intervalo',
	index_transform=lambda x: str(x+1))

	tiempo_real = RepeatableRegister(fixed_fields, repeatable_field)
	magnitud_medida = {
	"01": "Dióxido de Azufre",
	"06": "Monóxido de Carbono",
	"07": "Monóxido de Nitrógeno",
	"08": "Dióxido de Nitrógeno",
	"09": "Partículas < 2.5 μm",
	"10": "Partículas < 10 μm",
	"12": "Óxidos de Nitrógeno",
	"14": "Ozono",
	"20": "Tolueno",
	"30": "Benceno",
	"35": "Etilbenceno",
	"37": "Metaxileno",
	"38": "Paraxileno",
	"39": "Ortoxileno",
	"42": "Hidrocarburos totales (hexano)",
	"43": "Hidrocarburos (metano)",
	"44": "Hidrocarburos no metánicos (hexano)",
	"80": "Radiación ultravioleta",
	"81": "Velocidad del viento",
	"82": "Dirección del viento",
	"83": "Temperatura",
	"86": "Humedad relativa",
	"87": "Presión",
	"88": "Radiación solar",
	"89": "Precipitación",
	"92": "Lluvia ácida",
	}

	tecnica_analitica = {
	"38": "Fluorescencia ultravioleta",
	"48": "Absorción infrarroja",
	"08": "Quimioluminiscencia",
	"47": "Microbalanza",
	"06": "Absorción ultravioleta",
	"59": "Cromatografía de gases",
	"02": "Ionización de llama",
	"98": "Sensores meteorológicos",
	}