#4574

Committed 19 Sep 2023 02:18PM UTC coverage: 69.617% (-0.006%) from 69.623%

Build # #4574

Build Type

push

coveralls-python

Committed by

danmihaila

Commit Message

HDX-9010 - adding test for adding inactive in title

Run Details

11200 of 16088 relevant lines covered (69.62%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.29

/ckanext-hdx_package/ckanext/hdx_package/helpers/custom_validator.py

'''
Created on Apr 11, 2014

@author: alexandru-m-g
'''

import bisect
import datetime
import logging
import json
import re

import six
import ckan.model as model
import ckan.authz as authz
import ckan.plugins.toolkit as tk
import ckan.lib.navl.dictization_functions as df
from ckan.common import _, c

import ckanext.hdx_package.helpers.caching as caching
import ckanext.hdx_package.helpers.resource_triggers.geopreview as geopreview

from ckanext.hdx_package.helpers.constants import FILE_WAS_UPLOADED, NO_DATA
from ckanext.hdx_package.helpers.date_helper import DaterangeParser
from ckanext.hdx_package.helpers.resource_triggers.fs_check import FS_CHECK_FORMATS

missing = df.missing
StopOnError = df.StopOnError
Invalid = df.Invalid
get_action = tk.get_action
check_access = tk.check_access

NotAuthorized = tk.NotAuthorized

log = logging.getLogger(__name__)

_DATASET_PREVIEW_FIRST_RESOURCE = 'first_resource'
_DATASET_PREVIEW_RESOURCE_ID = 'resource_id'
_DATASET_PREVIEW_NO_PREVIEW = 'no_preview'
DATASET_PREVIEW_VALUES_LIST = [_DATASET_PREVIEW_FIRST_RESOURCE, _DATASET_PREVIEW_RESOURCE_ID,
                               _DATASET_PREVIEW_NO_PREVIEW]


# same as not_empty, but ignore whitespaces
def not_empty_ignore_ws(key, data, errors, context):
    value = data.get(key)
    if not value or value is missing:
        errors[key].append(_('Missing value'))
        raise StopOnError
    value = value.strip()
    if not value or value is missing:
        errors[key].append(_('Missing value'))
        raise StopOnError


def groups_not_empty(key, data, errors, context):
    """
    When creating a package, groups cannot be empty
    """
    # All the extra logic here is needed to deal with the multi-step wizard used for creating a new dataset
    # We need to make sure that the validation only runs at the last step of the wizard

    # allow_partial_update = context.get('allow_partial_update', False)
    # allow_state_change = context.get('allow_state_change', False)
    first_phase = False

    for data_key, data_value in data.items():
        if data_key[0] == '__extras':
            wizard_phase = data_value.get('_ckan_phase', 'Other')
            if wizard_phase == 'dataset_new_1':
                first_phase = True
                break

    group_list = caching.cached_group_list()
    country_names = [group['name'] for group in group_list if group.get('name')]
    country_ids = [group['id'] for group in group_list]
    country_names.sort()
    country_ids.sort()

    if not first_phase:
        error_msg = _('Missing value')
        problem_appeared = False
        try:
            num_of_groups = max((key[1] for key in data.keys() if key[0] == 'groups')) + 1
        except ValueError as e:
            num_of_groups = 0
            problem_appeared = True

        for group_idx in range(0, num_of_groups):
            group_correct = False
            group_id = data.get(('groups', group_idx, 'id'))
            if group_id and _in_sorted_list(group_id, country_ids):
                group_correct = True
            else:
                group_name = data.get(('groups', group_idx, 'name'))
                if group_name and _in_sorted_list(group_name, country_names):
                    group_correct = True

            if not group_correct:
                error_msg = _('Wrong country code or id')
                problem_appeared = True
                break

        if problem_appeared:
            errors[key].append(error_msg)
            raise StopOnError
    return None


def _in_sorted_list(value, sorted_list):
    index = bisect.bisect_left(sorted_list, value)
    if index != len(sorted_list) and sorted_list[index] == value:
        return True
    return False


def detect_format(key, data, errors, context):
    '''
    resource url should not be empty
    '''

    current_format = data.get(key)
    if not current_format or isinstance(current_format, df.Missing):
        url = data.get((key[0], key[1], 'url'))
        file_format = geopreview.detect_format_from_extension(url)
        if not file_format:
            name = data.get((key[0], key[1], 'name'))
            file_format = geopreview.detect_format_from_extension(name)
        if file_format:
            data[key] = file_format
            return file_format
        err_message = "We couldn't determine your file type. If it is a compressed format (zip, etc), please  \
                      indicate the primary format of the data files inside compressed file."
        errors[key].append(_(err_message))
        raise df.StopOnError()

    return current_format


def hdx_keep_if_fs_check_format(key, data, errors, context):
    _format = data.get((key[0], key[1], 'format'))
    if _format and _format.lower() in FS_CHECK_FORMATS:
        return data.get(key)
    else:
        data.pop(key, None)
        raise df.StopOnError()


def to_lower(current_value):
    if current_value:
        return current_value.lower()
    return current_value


def hdx_show_subnational(key, data, errors, context):
    '''
    resource url should not be empty
    '''

    current_value = data.get(key)
    if not current_value or isinstance(current_value, df.Missing):
        data[key] = "0"
        return data[key]
    if current_value in ["true", "True", "1"]:
        data[key] = "1"
        return data[key]
    if current_value in ["false", "False", "0", None]:
        data[key] = "0"
        return data[key]

    data[key] = "0"
    return data[key]


def find_package_creator(key, data, errors, context):
    current_creator = data.get(key)
    if not current_creator:
        user = c.user or c.author
        if user:
            data[key] = user
            current_creator = user

    return current_creator


def hdx_find_package_maintainer(key, data, errors, context):
    try:
        user_obj = model.User.get(data.get(key))
    except Exception as ex:
        raise df.Invalid(_('Maintainer does not exist. Please add valid user ID'))

    org_id = data.get(('owner_org',))
    if not org_id:
        raise df.Invalid(_('Organizations owner does not exist. Please add an organization ID'))

    members = get_action('hdx_member_list')(context, {'org_id': org_id})

    if user_obj and ((user_obj.id in members.get('all')) or user_obj.sysadmin):
        data[key] = user_obj.id
        return data[key]
    raise df.Invalid(_('Maintainer does not exist or is not a member of current owner organization.'
                       ' Please add valid user ID'))


def hdx_dataset_preview_validator(key, data, errors, context):
    try:
        dataset_preview = str(data.get(key))
        if dataset_preview and dataset_preview in DATASET_PREVIEW_VALUES_LIST:
            return data[key]
        data[key] = _DATASET_PREVIEW_FIRST_RESOURCE
    except Exception as ex:
        data[key] = _DATASET_PREVIEW_FIRST_RESOURCE
    return data[key]


def general_not_empty_if_other_selected(other_key, other_compare_value):
    '''

    :param other_key: the key of the field that influences this "_other" field. Ex. 'methodology', 'license_id'
    :type other_key: str
    :param other_compare_value: value of "other_key" field that maked this "_other" field mandatory. Ex. 'Other', 'hdx-other'
    :type other_compare_value: str
    :return: the validator function
    :rtype: not_empty_if_other_selected
    '''

    def not_empty_if_other_selected(key, data, errors, context):
        value = data.get(key)
        other_value = data.get((other_key,))
        if not value and other_value == other_compare_value:
            errors[key].append(_('Missing value'))
            raise StopOnError
        elif other_value != other_compare_value:
            del data[key]

            # Don't go further in the validation chain. Ex: convert to extras doesn't need to be called
            raise StopOnError

    return not_empty_if_other_selected


def hdx_convert_values_to_boolean_for_dataset_preview(key, data, errors, context):
    '''
    convert values to boolean and also sets the dataset_preview to false for the other resources
    '''

    value = data.get(key)
    if value in (True, False, 'True', 'False'):
        pass
    elif value in ('1', 1):
        # set others on False
        i = 0
        while True:
            temp_key_name = ('resources', i, 'name')
            temp_key_preview = ('resources', i, 'dataset_preview_enabled')
            if not data.get(temp_key_name):
                break
            data[temp_key_preview] = False
            i += 1
        data[key] = True

    elif value in ('0', 0):
        data[key] = False
    else:
        # value not in ('1',1,'0',0, True, False, 'True', 'False'):
        data[key] = None
    return data[key]


def hdx_convert_list_item_to_extras(key, data, errors, context):
    # Get the current extras index
    current_indexes = [k[1] for k in data.keys()
                       if len(k) > 1 and k[0] == 'extras']

    new_index = max(current_indexes) + 1 if current_indexes else 0

    data[('extras', new_index, 'key')] = '__'.join((str(item) for item in key))
    data[('extras', new_index, 'value')] = data[key]


def hdx_convert_from_extras_to_list_item(key, data, errors, context):
    def remove_from_extras(data, key):
        to_remove = []
        for data_key, data_value in data.items():
            if (data_key[0] == 'extras'
                and data_key[1] == key):
                to_remove.append(data_key)
        for item in to_remove:
            del data[item]

    keys_to_remove = []
    key_value_to_add = []

    for data_key, data_value in data.items():
        if isinstance(data_value, six.string_types):
            data_value_parts = data_value.split('__')  # Example: ['customviz', 0, 'url']
            key_parts = key[-1].split('__')  # Example ['customviz', 'url']
            if data_key[0] == 'extras' and data_key[-1] == 'key' \
                and len(data_value_parts) == 3 and len(key_parts) == 2 \
                and data_value_parts[0] == key_parts[0] and data_value_parts[2] == key_parts[1]:
                list_name = key_parts[0]
                property_name = key_parts[1]

                # current_indexes = [k[1] for k in data.keys()
                #                    if len(k) == 3 and k[0] == list_name and k[2] == property_name]
                # index = max(current_indexes) + 1 if current_indexes else 0

                key_value_to_add.append({
                    'key': (list_name, data_key[1], property_name),
                    'value': data[('extras', data_key[1], 'value')]
                })
                keys_to_remove.append(data_key[1])

    for key_val in key_value_to_add:
        data[key_val['key']] = key_val['value']

    for k in keys_to_remove:
        remove_from_extras(data, k)


def hdx_boolean_string_converter(value, context):
    '''
    Return a boolean for value.
    Return value when value is a python bool type.
    Return True for strings 'true', 'yes', 't', 'y', and '1'.
    Return False in all other cases, including when value is an empty string or
    None
    '''
    if value is missing or value is None:
        return "false"
    if isinstance(value, bool):
        return "true" if value else "false"
    if value.lower() in ['true', 'yes', 't', 'y', '1']:
        return "true"
    return "false"


def hdx_assume_missing_is_true(value, context):
    if value is missing or value is None:
        return "true"
    return value


def hdx_isodate_to_string_converter(value, context):
    if isinstance(value, datetime.datetime):
        return value.isoformat()
    return None


def reset_on_file_upload(key, data, errors, context):
    resource_id = data.get(key[:-1] + ('id',))
    if resource_id and resource_id in context.get(FILE_WAS_UPLOADED, set()):
        data.pop(key, None)


def hdx_resource_keep_prev_value_unless_sysadmin(key, data, errors, context):
    '''
    By default, this should inject the value from the previous version.
    The exception is if the user is a sysadmin, then the new value is used.
    '''

    if data[key] is missing:
        data.pop(key, None)

    user = context.get('user')
    ignore_auth = context.get('ignore_auth')
    allowed_to_change = ignore_auth or (user and authz.is_sysadmin(user))

    if not allowed_to_change:
        data.pop(key, None)
        resource_id = data.get(key[:-1] + ('id',))
        package_id = data.get(('id',))
        if resource_id:
            specific_key = key[2]
            context_key = 'resource_' + resource_id
            resource_dict = context.get(context_key)
            if not resource_dict:
                resource_dict = __get_previous_resource_dict(context, package_id, resource_id)
                context[context_key] = resource_dict
            if resource_dict:
                old_value = resource_dict.get(specific_key)
                if old_value is not None:
                    data[key] = old_value

    if key not in data:
        raise StopOnError


# def hdx_update_field_if_value_wrapper(context_field, value):
#     def hdx_update_field_if_value(key, data, errors, context):
#         a = 10
#
#     return hdx_update_field_if_value
def hdx_update_microdata(key, data, errors, context):
    if not data.get(key):
        pkg_id = data.get(('id',))
        if pkg_id:
            # pkg_dict = __get_previous_package_dict(context, pkg_id)
            # if pkg_dict.get('resources', [])[key[1]].get('in_quarantine') and not data.get(key):
            #     data[key[:2] + ('microdata',)] = False
            res_id = data.get(key[:-1] + ('id',))
            if res_id:
                # resource exists
                res_dict = __get_previous_resource_dict(context, pkg_id, res_id)
                if res_dict and res_dict.get('in_quarantine'):
                    data[key[:2] + ('microdata',)] = False


def hdx_update_in_quarantine_by_microdata(key, data, errors, context):
    if data.get(key):
        pkg_id = data.get(('id',))
        if pkg_id:
            res_id = data.get(key[:-1] + ('id',))
            if res_id:
                # resource exists
                res_dict = __get_previous_resource_dict(context, pkg_id, res_id)
                # check if previous value was not microdata
                if res_dict and not res_dict.get('microdata'):
                    data[key[:2] + ('in_quarantine',)] = True
            # new resource will be put in quarantine
            else:
                data[key[:2] + ('in_quarantine',)] = True
        # if new package, resource will be in quarantine
        else:
            data[key[:2] + ('in_quarantine',)] = True


def hdx_update_data_frequency_by_archived(key, data, errors, context):
    if data.get(key) and data.get(key) == 'true':
        data[(u'data_update_frequency',)] = '-1'


def hdx_add_update_fs_check_info(key, data, errors, context):
    try:
        fs_check_value = data.get(key)

        # if fs_check_value is dict, then is a new value coming and we need to process it.
        if fs_check_value and isinstance(fs_check_value, dict):
            if context.get('allow_fs_check_field'):
                pkg_id = data.get(('id',))
                resource_id = data.get(key[:-1] + ('id',))

                # resource update
                if resource_id:
                    resource_dict = __get_previous_resource_dict(context, pkg_id, resource_id) or {}
                    specific_key = key[2]
                    old_value = resource_dict.get(specific_key)

                    # if fs_check_info exists, then append new value
                    if old_value is not None:
                        if isinstance(old_value, str):
                            old_value = json.loads(old_value.replace('\'', '"'))
                        if not isinstance(old_value, list):
                            old_value = [old_value]
                        old_value.append(fs_check_value)
                    else:
                        old_value = [fs_check_value]
                    data[key] = old_value[-10:]

                # resource create
                else:
                    # if not isinstance(data[key], list):
                    data[key] = [data[key]]
            # else:
            #     # loads current value as py object to allow the next validator to use the value
            #     if isinstance(data.get(key), str):
            #         try:
            #             data[key] = json.loads(data[key].replace('\'', '"'))
            #         except:
            #             log.error("fs_check_info contains a strange string: " + str(data[key]))

            log.info("done with add update fs_check_info")
    except Exception as ex:
        log.info(ex)


def hdx_package_keep_prev_value_unless_field_in_context_wrapper(context_field, resource_level=False):
    def hdx_package_keep_prev_value_unless_field_in_context(key, data, errors, context):
        '''
        By default, this should inject the value from the previous version.
        The exception is if the 'context_field' key is in the context.
        NOTE, we don't check whether user is sysadmin. The api action that set the
        'context_field' should do any checks.
        '''

        if data[key] is missing:
            data.pop(key, None)

        allow_context_field = context.get(context_field)

        if not allow_context_field:
            data.pop(key, None)
            pkg_id = data.get(('id',))

            if resource_level:
                resource_id = data.get(key[:-1] + ('id',))
                if resource_id:
                    resource_dict = __get_previous_resource_dict(context, pkg_id, resource_id) or {}
                    specific_key = key[2]
                    old_value = resource_dict.get(specific_key)
                    if old_value is not None:
                        data[key] = old_value

            # package level
            else:
                if pkg_id:
                    pkg_dict = __get_previous_package_dict(context, pkg_id)
                    old_value = pkg_dict.get(key[0], None)
                    if old_value is not None:
                        data[key] = old_value
        if key not in data:
            raise StopOnError

    return hdx_package_keep_prev_value_unless_field_in_context


def hdx_keep_prev_value_if_empty(key, data, errors, context):
    new_value = data.get(key)
    if new_value is missing or (not new_value and new_value is not False):  # False is not an empty value
        data.pop(key, None)
        pkg_id = data.get(('id',))
        if pkg_id:
            prev_package_dict = __get_previous_package_dict(context, pkg_id)
            old_value = prev_package_dict.get(key[0], None)
            if old_value:
                data[key] = old_value

    if isinstance(new_value, six.string_types) and not new_value.strip():
        data.pop(key, None)

    if key not in data:
        raise StopOnError


def hdx_delete_unless_field_in_context(context_field):
    '''
    :param context_field: the field in the context which tells us if it's ok to allow the value through
    :type context_field: str
    :return:
    :rtype: function
    '''

    def hdx_delete_unless_forced(key, data, errors, context):
        if not context.get(context_field):
            data.pop(key, None)

    return hdx_delete_unless_forced


def hdx_delete_unless_authorized_wrapper(auth_function):
    '''
    :param auth_function: the auth function to run through check_access()
    :type auth_function: str
    :return:
    :rtype: function
    '''

    def hdx_delete_unless_authorized(key, data, errors, context):
        try:
            check_access(auth_function, context, None)
        except NotAuthorized as e:
            data.pop(key, None)

    return hdx_delete_unless_authorized


def hdx_delete_if_marked_with_no_data(key, data, errors, context):
    if data.get(key) == NO_DATA:
        data.pop(key, None)


def hdx_value_in_list_wrapper(allowed_values, allow_missing):
    def hdx_value_in_list(key, data, errors, context):
        value = data[key]
        value_is_missing = not value or value is missing
        if not allow_missing and value_is_missing:
            raise Invalid(_('Value is missing'))
        if not value_is_missing and value not in allowed_values:
            raise Invalid(_('Value not in allowed list'))

    return hdx_value_in_list


def hdx_daterange_possible_infinite_end(key, data, errors, context):
    value = data.get(key)  # type: str
    new_value = DaterangeParser(value).compute_daterange_string(False)
    data[key] = new_value


def hdx_daterange_possible_infinite_end_dataset_date(key, data, errors, context):
    value = data.get(key)  # type: str
    new_value = DaterangeParser(value).compute_daterange_string(False, end_date_ending=True)
    data[key] = new_value


def hdx_convert_old_date_to_daterange(key, data, errors, context):
    value = data[key]
    if value and '[' in value and ']' in value and ' TO ' in value:
        return
    try:
        if value:
            dates_list = value.split('-')
            if dates_list:
                start_date = datetime.datetime.strptime(dates_list[0].strip(), '%m/%d/%Y')
                if len(dates_list) == 2:
                    end_date = datetime.datetime.strptime(dates_list[1].strip(), '%m/%d/%Y')
                else:
                    end_date = start_date
                data[key] = "[{start_date}T00:00:00 TO {end_date}T23:59:59]".format(
                    start_date=start_date.strftime("%Y-%m-%d"),
                    end_date=end_date.strftime("%Y-%m-%d"))
    except TypeError as e:
        raise df.Invalid(_('Invalid old HDX date format MM/DD/YYYY. Please use [start_datetime TO end_datetime]'))
    except ValueError as e:
        raise df.Invalid(_('Invalid old HDX date format MM/DD/YYYY. Please use [start_datetime TO end_datetime]'))


def hdx_convert_to_json_string(key, data, errors, context):
    value = data[key]
    try:
        data[key] = json.dumps(value)
    except TypeError as e:
        raise df.Invalid(_('Input is not valid'))


def hdx_convert_to_json_string_if_not_string(key, data, errors, context):
    value = data[key]
    if value and not isinstance(value, str):
        return hdx_convert_to_json_string(key, data, errors, context)


def hdx_convert_from_json_string(key, data, errors, context):
    value = data[key]
    try:
        data[key] = json.loads(value)
    except (ValueError, TypeError) as e:
        raise df.Invalid(_('Could not parse JSON'))


# def hdx_autocompute_grouping(key, data, errors, context):
#     current_value = data.get(key)
#     if not current_value or current_value is missing:
#         daterange_value = data.get(key[:-1] + ('daterange_for_data',))
#         if daterange_value and daterange_value is not missing:
#             daterange_parser = DaterangeParser(daterange_value)
#             data[key] = daterange_parser.human_readable()

def hdx_float_number(key, data, errors, context):
    value = data[key]
    try:
        data[key] = float(value)
    except (ValueError, TypeError) as e:
        raise df.Invalid(_('Input is not a float valid number'))


def __get_previous_resource_dict(context, package_id, resource_id):
    dataset_dict = __get_previous_package_dict(context, package_id)
    return next((r for r in dataset_dict.get('resources', []) if r['id'] == resource_id), None)


def __get_previous_package_dict(context, id):
    context_key = 'hdx_prev_package_dict_' + id
    pkg_dict = context.get(context_key)
    if not pkg_dict:
        pkg_dict = get_action('package_show')(context, {'id': id})
        context[context_key] = pkg_dict

    return pkg_dict or {}


def hdx_resources_not_allowed_if_requested_data(key, data, errors, context):
    if data[key] and ((u'resources', 0, 'url') in data or (u'resources', 0, 'name') in data):
        raise df.Invalid(_('By request - HDX Connect datasets can not store resources'))


DATASERIES_TITLE_PATTERN = re.compile('^[\w ,-]+$', re.UNICODE)
def hdx_dataseries_title_validator(value, context):
    if value:
        if not DATASERIES_TITLE_PATTERN.match(value):
            raise Invalid(_('Dataseries title is not valid'))
    return value

OCHA-DAP / hdx-ckan / #4574

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous