Skip to content

data.wrangling.misc

Data - Wrangling - misc¤

convert_str_repr_to_list(inp) ¤

convert_str_repr_to_list concerts string representation of list to list

Source code in dietbox/data/wrangling/misc.py
def convert_str_repr_to_list(inp):
    """
    convert_str_repr_to_list concerts string representation of list to list
    """

    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    elif isinstance(inp, (list, tuple, set)):
        res = list(inp)

    return res

convert_str_repr_to_tuple(inp) ¤

convert_str_repr_to_tuple converts string representation of tuple to tuple

Source code in dietbox/data/wrangling/misc.py
def convert_str_repr_to_tuple(inp):
    """
    convert_str_repr_to_tuple converts string representation of tuple to tuple
    """

    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    if isinstance(inp, (list, tuple, set)):
        res = tuple(inp)

    return res

convert_to_bool(data) ¤

convert_to_bool converts input to bool type in python.

The following values are converted to True:

  1. 'true'
  2. 'yes'
  3. '1'
  4. 'y'
  5. 1

The following values are converted to False:

  1. 'false'
  2. 'no'
  3. '0'
  4. 'n'
  5. 0

Parameters:

Name Type Description Default
data

input data

required

Returns:

Type Description
bool

boolean value of the input data

Source code in dietbox/data/wrangling/misc.py
def convert_to_bool(data):
    """
    convert_to_bool converts input to bool type in python.

    The following values are converted to True:

    1. 'true'
    2. 'yes'
    3. '1'
    4. 'y'
    5. 1

    The following values are converted to False:

    1. 'false'
    2. 'no'
    3. '0'
    4. 'n'
    5. 0

    :param data: input data
    :return: boolean value of the input data
    :rtype: bool
    """
    res = None
    if data is None:
        return res
    elif isinstance(data, bool):
        res = data
    elif isinstance(data, str):
        if data.lower().strip() in ["true", "yes", "1", "y"]:
            res = True
        elif data.lower().strip() in ["false", "no", "0", "n"]:
            res = False
        else:
            res = None
    elif isinstance(data, (float, int)):
        res = bool(data)

    return res

convert_to_list(inp) ¤

convert_to_list converts string representation of lists to list

It also works for list, tuple, or set input.

Parameters:

Name Type Description Default
inp

string representation of list, list, tuple, set

required

Returns:

Type Description
list

converted list

Source code in dietbox/data/wrangling/misc.py
def convert_to_list(inp):
    """
    convert_to_list converts string representation of lists to list

    It also works for list, tuple, or set input.

    :param inp: string representation of list, list, tuple, set
    :return: converted list
    :rtype: list
    """

    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    elif isinstance(inp, (list, tuple, set)):
        res = list(inp)

    return res

convert_to_tuple(inp) ¤

convert_to_tuple converts string representation of tuple to tuple

It also works for list, tuple, or set input.

Parameters:

Name Type Description Default
inp

string representation of tuple, list, tuple, set

required

Returns:

Type Description
tuple

converted tuple

Source code in dietbox/data/wrangling/misc.py
def convert_to_tuple(inp):
    """
    convert_to_tuple converts string representation of tuple to tuple

    It also works for list, tuple, or set input.

    :param inp: string representation of tuple, list, tuple, set
    :return: converted tuple
    :rtype: tuple
    """

    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    if isinstance(inp, (list, tuple, set)):
        res = tuple(inp)

    return res

eu_float_string_to_float(data) ¤

eu_float_string_to_float converts strings in EU format to floats

Parameters:

Name Type Description Default
data str

string of the float in EU conventions

required

Returns:

Type Description
float

converted float from the string

Source code in dietbox/data/wrangling/misc.py
def eu_float_string_to_float(data):
    """
    eu_float_string_to_float converts strings in EU format to floats

    :param data: string of the float in EU conventions
    :type data: str
    :return: converted float from the string
    :rtype: float
    """
    if isinstance(data, str):
        res = data.replace(".", "")
        res = res.replace(",", ".")
        try:
            res = float(res)
        except Exception as e:
            raise Exception(f"Could not convert string {data} to float: {e}")
    else:
        raise TypeError("Input data should be string")

    return res

get_value_in_dict_recursively(dictionary, path, ignore_path_fail=None) ¤

Get value of a dictionary according to specified path (names)

Parameters:

Name Type Description Default
dictionary dict

input dictionary

required
path list

path to the value to be obtained This function always returns the value or None. >>> get_value_in_dict_recursively({'lvl_1':{'lvl_2':{'lvl_3':'lvl_3_value'}}},['lvl_1','lvl_3']) {'lvl_3':'lvl_3_value'} >>> get_value_in_dict_recursively({1:{2:{3:'hi'}}},[1,'2',3]) {'hi'}

required
Source code in dietbox/data/wrangling/misc.py
def get_value_in_dict_recursively(dictionary, path, ignore_path_fail=None):
    """
    Get value of a dictionary according to specified path (names)

    :param dict dictionary: input dictionary
    :param list path: path to the value to be obtained

    This function always returns the value or None.

    >>> get_value_in_dict_recursively({'lvl_1':{'lvl_2':{'lvl_3':'lvl_3_value'}}},['lvl_1','lvl_3'])
    {'lvl_3':'lvl_3_value'}
    >>> get_value_in_dict_recursively({1:{2:{3:'hi'}}},[1,'2',3])
    {'hi'}
    """
    if ignore_path_fail is None:
        ignore_path_fail = True

    if isinstance(path, list):
        path_temp = path.copy()
    elif isinstance(path, tuple):
        path_temp = list(path).copy()
    else:
        logger.warning(f"path is not list or tuple, converting to list: {path}")
        path_temp = [path].copy()

    if len(path_temp) > 1:
        pop = path_temp.pop(0)
        try:
            pop = int(pop)
        except ValueError:
            if ignore_path_fail:
                logger.warning(f"can not get path")
                pass
            else:
                raise Exception(f"specified path ({path}) is not acceptable")

        try:
            return get_value_in_dict_recursively(dictionary[pop], path_temp)
        except:
            logger.debug(f"did not get values for {pop}")
            return None
    elif len(path_temp) == 0:
        return None
    else:
        try:
            val = int(path_temp[0])
        except:
            val = path_temp[0]
        try:
            return dictionary[val]
        except KeyError:
            logger.error(f"KeyError: Could not find {path_temp[0]}")
            return None
        except TypeError:
            logger.error(f"TypeError: Could not find {path_temp[0]}")
            return None

remove_outliers(dataset, criteria=None) ¤

remove_outliers will filter out the outliers of dataset

Changes will be made to original dataset.

Parameters:

Name Type Description Default
dataset

dataframe that contains the data to be filtered

required
Source code in dietbox/data/wrangling/misc.py
def remove_outliers(dataset, criteria=None):
    """
    remove_outliers will filter out the outliers of dataset

    Changes will be made to original dataset.

    :param dataset: dataframe that contains the data to be filtered
    """
    logger.info("Removing outliers ... ")
    if criteria is None:
        criteria = {"target": {"quantile_range": [0.01, 0.99]}}

    for col in criteria:
        if col not in dataset.columns:
            logger.warning(f"Column {col} is not in dataset ({dataset.columns})")
            continue
        # Remove isna if required in criteria
        col_isna = criteria[col].get("isna", False)
        if col_isna:
            dataset = dataset.loc[~dataset[col].isna()]

        # only use between values
        col_range = criteria[col].get("range", [-np.inf, np.inf])
        col_quantile_range = criteria[col].get("quantile_range", ())
        if col_quantile_range:
            col_range_from_quantile_lower = dataset[col].quantile(col_quantile_range[0])
            col_range_from_quantile_upper = dataset[col].quantile(col_quantile_range[1])
            if col_range_from_quantile_lower >= col_range[0]:
                col_range[0] = col_range_from_quantile_lower
            if col_range_from_quantile_upper <= col_range[1]:
                col_range[1] = col_range_from_quantile_upper

        dataset = dataset.loc[dataset[col].between(*col_range)]

    logger.info("Removed outliers!")

update_dict_recursively(dictionary, key_path, value) ¤

update or insert values to a dictionary recursively.

Parameters:

Name Type Description Default
dictionary dict

the dictionary to be inserted into

required
key_path list

the path for the insertion value

required
item

value to be inserted

required

Returns:

Type Description

a dictionary with the inserted value >>> update_dict_recursively({}, ['a', 'b', 1, 2], 'this_value') {'a': {'b': {1: {2: 'this_value'}}}}

Source code in dietbox/data/wrangling/misc.py
def update_dict_recursively(dictionary, key_path, value):
    """
    update or insert values to a dictionary recursively.

    :param dict dictionary: the dictionary to be inserted into
    :param list key_path: the path for the insertion value
    :param item: value to be inserted
    :returns: a dictionary with the inserted value

    >>> update_dict_recursively({}, ['a', 'b', 1, 2], 'this_value')
    {'a': {'b': {1: {2: 'this_value'}}}}
    """
    sub_dictionary = dictionary
    for key in key_path[:-1]:
        if key not in sub_dictionary:
            sub_dictionary[key] = {}
        sub_dictionary = sub_dictionary[key]

    sub_dictionary[key_path[-1]] = value

    return dictionary