Source code for basin3d.core.translate

"""

.. currentmodule:: basin3d.core.translate

:synopsis: The BASIN-3D Translate functionality
:module author: Danielle Svehla Christianson <dschristianson@lbl.gov>

.. contents:: Contents
    :local:
    :backlinks: top

"""

from itertools import product, repeat
from typing import List, Optional, Union

from basin3d.core import monitor
from basin3d.core.schema.enum import MAPPING_DELIMITER, NO_MAPPING_TEXT
from basin3d.core.schema.query import QueryBase, QueryMeasurementTimeseriesTVP, QueryMonitoringFeature


logger = monitor.get_logger(__name__)


def _clean_query(translated_query: QueryBase) -> QueryBase:
    """
    Remove any NOT_SUPPORTED translations

    :param translated_query: the translated query that may have NOT_SUPPORTED translation results
    :return: cleaned query that has all NOT_SUPPORTED translations removed
    """
    for attr in translated_query.mapped_fields:
        attr_value = getattr(translated_query, attr)
        if attr_value and isinstance(attr_value, list):
            clean_list = [val for val in attr_value if val != NO_MAPPING_TEXT]
            unique_list = list(set(clean_list))
            setattr(translated_query, attr, unique_list)
        elif attr_value and attr_value == NO_MAPPING_TEXT:
            setattr(translated_query, attr, None)
    return translated_query


def _get_attr_types_in_compound_mappings(plugin_access) -> list:
    """
    Get all the compound mappings for a datasource if any exist

    :param plugin_access: plugin access
    :return: list of attribute types that are part of a compound mapping.
    """
    compound_mapping_attrs = set()

    attr_mapping_iterator = plugin_access.get_attribute_mappings()

    for attr_mapping in attr_mapping_iterator:
        if MAPPING_DELIMITER in attr_mapping.attr_type:
            compound_mapping_attrs.add(attr_mapping.attr_type)

    return list(compound_mapping_attrs)


def _get_attr_type_if_compound_mapping(plugin_access, attr_type: str) -> Optional[str]:
    """
    Return the compound attr_type str if the specified attr_type is part of a compound_mapping

    :param plugin_access:
    :param attr_type:
    :return: str if attr mapping is part of compound mapping
    """
    compound_mapping_str = None
    attr_type = attr_type.upper()
    attr_mapping_iterator = plugin_access.get_attribute_mappings(attr_type=attr_type)

    # look at the first element returned if there is one
    for attr_mapping in attr_mapping_iterator:
        if MAPPING_DELIMITER in attr_mapping.attr_type:
            compound_mapping_str = attr_mapping.attr_type
        # only need to look at the first attribute mapping returned
        break

    return compound_mapping_str


def _get_single_attr_types_in_compound_mappings(plugin_access, attr_type: str, include_specified_type: bool = False) -> list:
    """
    Return the attributes if attr_type is part of a compound mapping

    :param attr_type: the attribute type
    :param include_specified_type: bool, True = include in the return the specified attr_type. False: return the other attribute types that are part of the compound mapping.
    :return: list of attributes in the compound mapping
    """

    compound_mapping_attrs: List[str] = []

    compound_mapping_str = _get_attr_type_if_compound_mapping(plugin_access, attr_type)

    if not compound_mapping_str:
        return compound_mapping_attrs

    for attr in compound_mapping_str.split(MAPPING_DELIMITER):
        if attr == attr_type.upper() and not include_specified_type:
            continue
        compound_mapping_attrs.append(attr)

    return compound_mapping_attrs


def _is_translated_query_valid(datasource_id, query, translated_query) -> Optional[bool]:
    """
    Determine if the translated query is valid. A valid translated query has at least one datasoure vocabulary
    for any query field that was supecified (i.e., with a BASIN-3D vocab)

    :param datasource_id: the datasource id
    :param query: the original query
    :param translated_query: the translated query
    :return: boolean (True = valid translated query, False = invalid translated query) or None (translated query could not be assessed)
    """
    for field_type, field_list in zip(['mapped', 'prefixed'], [query.mapped_fields, query.prefixed_fields]):
        # loop thru kwargs
        for attr in field_list:
            translated_attr_value = getattr(translated_query, attr)
            b3d_attr_value = getattr(query, attr)
            msg_prefix = ''
            if field_type == 'mapped':
                if isinstance(b3d_attr_value, list):
                    b3d_attr_value = ', '.join(b3d_attr_value)
                msg_prefix = f'No vocabulary found for attribute {attr} with values: {b3d_attr_value}.'
            if translated_attr_value and isinstance(translated_attr_value, list):
                # if list and all of list == NOT_SUPPORTED, False
                if all([x == NO_MAPPING_TEXT for x in translated_attr_value]):
                    logger.warning(f'Translated query for datasource {datasource_id} is invalid.{msg_prefix}')
                    return False
            elif translated_attr_value and isinstance(translated_attr_value, str):
                # if single NOT_SUPPORTED, False
                if translated_attr_value == NO_MAPPING_TEXT:
                    logger.warning(f'Translated query for datasource {datasource_id} is invalid.{msg_prefix}')
                    return False
            elif translated_attr_value:
                logger.warning(
                    f'Translated query for datasource {datasource_id} cannot be assessed. Translated value for {attr} is not expected type.')
                return None
    return True


def _order_mapped_fields(plugin_access, query_mapped_fields):
    """
    Order the mapped fields according to the order of the attributes in any compound mappings. And then handle the rest of the mappings.

    :param plugin_access:
    :param query_mapped_fields:
    :return:
    """
    query_mapped_fields_ordered = []

    # get list of compound mappings if any
    compound_mappings = _get_attr_types_in_compound_mappings(plugin_access)

    # If there are compound mappings...
    if compound_mappings:
        cm_fields = []
        # Split up the compound mappings, preserving the order of the attributes as specified in the plugin mapping file.
        # The order only matters relative to the individual compound mapping.
        for cm in compound_mappings:
            cm_attrs = cm.split(MAPPING_DELIMITER)
            cm_fields.extend([cm_attr.lower() for cm_attr in cm_attrs])
        # first loop thru the compound mapping fields
        for cm in cm_fields:
            # if the attribute is one of the mapped fields in this particular query
            if cm in query_mapped_fields:
                # add it to the ordered list
                query_mapped_fields_ordered.append(cm)
                # then remove it from the mapped field list
                query_mapped_fields.pop(query_mapped_fields.index(cm))
        # then, add any remaining non-compound fields
        query_mapped_fields_ordered.extend(query_mapped_fields)
    else:
        # if there are no compound mappings, then the order doesn't matter, just copy the mapped field list.
        query_mapped_fields_ordered = query_mapped_fields

    return query_mapped_fields_ordered


def _translate_mapped_query_attrs(plugin_access, query: Union[QueryMeasurementTimeseriesTVP, QueryMonitoringFeature]) -> QueryBase:
    """
    Translation functionality
    """
    query_mapped_fields = query.mapped_fields.copy()

    # if there are no mapped fields, return the query as is.
    if not query_mapped_fields:
        return query

    # order the query fields by any compound attributes
    query_mapped_fields_ordered = _order_mapped_fields(plugin_access, query_mapped_fields)

    for attr in query_mapped_fields_ordered:
        # if the attribute is specified, proceed to translate it
        # NOTE: looking in the translated_query which is mutable. As the translation occurs, translated query fields may change
        #       and the if statement may have different values for a given field during the loop.
        if getattr(query, attr):
            b3d_vocab = getattr(query, attr)

            if isinstance(b3d_vocab, str):
                ds_vocab = _translate_to_datasource_vocab(plugin_access, attr.upper(), b3d_vocab, query)
            else:
                ds_vocab = []
                for b3d_value in b3d_vocab:
                    # handle multiple values returned
                    ds_vocab.extend(_translate_to_datasource_vocab(plugin_access, attr.upper(), b3d_value, query))
            setattr(query, attr, ds_vocab)

            # look up whether the attr is part of a compound mapping
            compound_attrs = _get_single_attr_types_in_compound_mappings(plugin_access, attr)
            # if so: for any compound attrs, clear out the values in the synthesized query b/c search needs to be done on the coupled datasource_vocab
            for compound_attr in compound_attrs:
                setattr(query, compound_attr.lower(), None)

    # NOTE: always returns list for each mapped attr b/c multiple datasource vocab can be mapped to a single BASIN-3D vocab.
    return query


def _translate_prefixed_query_attrs(plugin_access, query: Union[QueryMeasurementTimeseriesTVP, QueryMonitoringFeature]) -> QueryBase:
    """

    :param plugin_access:
    :param query:
    :return:
    """
    def extract_id(identifer):
        """
        Extract the datasource identifier from the broker identifier
        :param identifer:
        :return:
        """
        if identifer:
            site_list = identifer.split("-")
            identifer = identifer.replace("{}-".format(site_list[0]), "", 1)  # The datasource id prefix needs to be removed
        return identifer

    id_prefix = plugin_access.datasource.id_prefix

    for attr in query.prefixed_fields:
        attr_value = getattr(query, attr)
        if attr_value:

            # if the value is a string
            if isinstance(attr_value, str):
                translated_value = extract_id(attr_value)
                if translated_value == attr_value:
                    translated_value = NO_MAPPING_TEXT

            # otherwise assume it is a list -- pass any non-string values through through
            else:
                translated_value = []
                for x in attr_value:
                    if isinstance(x, str):
                        if x.startswith("{}-".format(id_prefix)):
                            translated_value.append(extract_id(x))
                    else:
                        translated_value.append(x)

            setattr(query, attr, translated_value)

    return query


def _translate_to_datasource_vocab(plugin_access, attr_type: str, basin3d_vocab: str, b3d_query) -> list:
    """
    Find the datasource vocabulary(ies) for the specified datasource, attribute type, BASIN-3D vocabulary, and full query that may specify other attributes.
    Because multiple datasource vocabularies can be mapped to the same BASIN-3D vocabulary, the return is a list of the datasource vocabs.

    :param plugin_access: plugin access
    :param attr_type: the attribute type
    :param basin3d_vocab: the BASIN-3D vocabulary
    :param b3d_query: either a QueryBase class or subclass object, or a dictionary
    :return: list of the datasource vocabularies
    """
    # convert attr_type to uppercase
    attr_type = attr_type.upper()

    # is the attr_type part of a compound mapping?
    compound_mapping_attrs = _get_single_attr_types_in_compound_mappings(plugin_access, attr_type, include_specified_type=True)

    b3d_vocab_combo_str = [basin3d_vocab]

    # if compound_mapping, find all the relevant value combos given the query
    if compound_mapping_attrs:
        b3d_vocab_filter_lists = []  # list to hold lists of specified filters, one for each attr

        # loop thru each of the compound mapping attributes, build a list of lists of query combos
        for attr in compound_mapping_attrs:
            attr_value = None

            # by default: match any number of characters excepting a new line for the attribute
            # replace this value below if a value for the attribute is specified in the query
            filter_values: List[str] = ['.*']
            # if the attr is the attr_type, set the filter to the specified vocab
            if attr == attr_type:
                filter_values = [basin3d_vocab]
            elif issubclass(b3d_query.__class__, QueryBase) and hasattr(b3d_query, attr.lower()):
                attr_value = getattr(b3d_query, attr.lower())
            elif isinstance(b3d_query, dict) and attr.lower() in b3d_query.keys():
                attr_value = b3d_query.get(attr.lower())

            # if there is a value, replace the default value
            if attr_value:
                filter_values = attr_value
                # if the values are a str, change it to a list
                if isinstance(attr_value, str):
                    filter_values = attr_value.split(',')

            # append the filter list to the main list
            b3d_vocab_filter_lists.append(filter_values)

        # create a list of sets contain the combinations of filter options for each attr
        b3d_vocab_combo_sets = list(product(*b3d_vocab_filter_lists))

        # change each set into a str for search
        b3d_vocab_combo_str = [MAPPING_DELIMITER.join(v) for v in b3d_vocab_combo_sets]

    ds_vocab: List[str] = []
    no_match_list: List[str] = []

    # Loop thru the list of vocabulary string combos to search the attribute mapping database
    for basin3d_vocab_str in b3d_vocab_combo_str:
        results_iterator = plugin_access.get_attribute_mappings(attr_type=attr_type,
                                                                attr_vocab=basin3d_vocab_str, from_basin3d=True)
        query_results = []

        # there may be more than one datasource variable mapped to the specified BASIN-3D vocab
        # Loop thru the results and collect all of them.
        for qr in results_iterator:
            query_results.append(qr.datasource_vocab)

        # If there were indeed mappings, add them to the main ds_vocab list and move on
        if query_results:
            ds_vocab.extend(query_results)
            continue

        # if not result for the combo, add it to the no_match_list
        no_match_list.append(basin3d_vocab_str)

    if not ds_vocab:
        ds_vocab = [NO_MAPPING_TEXT]

    if no_match_list:
        if compound_mapping_attrs:
            attr_type = ':'.join(compound_mapping_attrs)
        logger.info(f'Datasource "{plugin_access.datasource.id}" did not have matches for attr_type '
                    f'{attr_type} and BASIN-3D vocab {", ".join(no_match_list)}.')

    return ds_vocab


[docs] def get_datasource_mapped_attribute(plugin_access, attr_type, datasource_vocab): """ Get the :class:`basin3d.core.models.MappedAttribute` object(s) for the specified attribute type and datasource attribute vocab(s) :param plugin_access: plugin_access :param attr_type: attribute type :param datasource_vocab: datasource attribute vocabulary :return: a single or list of `basin3d.core.models.MappedAttribute` objects """ if isinstance(datasource_vocab, str): return plugin_access.get_datasource_attribute_mapping(attr_type, datasource_vocab) elif isinstance(datasource_vocab, list): return list(map(plugin_access.get_datasource_attribute_mapping, repeat(attr_type), datasource_vocab))
[docs] def translate_attributes(plugin_access, mapped_attrs, **kwargs): """ Translate datasource vocabularies to BASIN-3D vocabularies via :class:`basin3d.core.models.AttributeMapping` objects specified in the datasource plugin for specified model attributes (i.e., those that have mappings). This function is used by data model objects during object creation. See :class:`basin3d.core.models`. :param plugin_access: plugin_access :param mapped_attrs: a model's mapped attributes :param kwargs: the full set of a model's attributes :return: kwargs: the model's attributes, including the translated attributes """ # copy the kwargs be able to loop thru the original while modifying the actual for compound mappings kwargs_orig = kwargs.copy() for attr in mapped_attrs: if attr in kwargs_orig: datasource_vocab = kwargs[attr] attr_mapping = get_datasource_mapped_attribute(plugin_access, attr_type=attr.upper(), datasource_vocab=datasource_vocab) kwargs[attr] = attr_mapping # If the attr is part of a compound mapping and the compound attr is not part of the kwargs, set it. cm_attrs = _get_single_attr_types_in_compound_mappings(plugin_access, attr) if cm_attrs: for cm_attr in cm_attrs: if cm_attr.lower() not in kwargs: cm_attr_mapping = get_datasource_mapped_attribute(plugin_access, attr_type=cm_attr.upper(), datasource_vocab=datasource_vocab) kwargs[cm_attr.lower()] = cm_attr_mapping return kwargs
[docs] def translate_query(plugin_access, query: Union[QueryMeasurementTimeseriesTVP, QueryMonitoringFeature]) -> QueryBase: """ Translate BASIN-3D vocabulary specified in a query to the datasource vocabularies defined by :class:`basin3d.core.models.AttributeMapping` objects specified in the datasource plugin. :param plugin_access: plugin access :param query: query to be translated :return: translated query as a :class:`basin3d.core.schema.query.QueryBase` object """ translated_query = query.copy() _translate_mapped_query_attrs(plugin_access, translated_query) _translate_prefixed_query_attrs(plugin_access, translated_query) is_valid_translated_query = _is_translated_query_valid(plugin_access.datasource.id, query, translated_query) if is_valid_translated_query: translated_query.is_valid_translated_query = is_valid_translated_query _clean_query(translated_query) return translated_query