data.wrangling.misc
Data - Wrangling - misc¤
convert_str_repr_to_list(inp)
¤
    convert_str_repr_to_list concerts string representation of list to list
Source code in dietbox/data/wrangling/misc.py
          def convert_str_repr_to_list(inp):
    """
    convert_str_repr_to_list concerts string representation of list to list
    """
    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    elif isinstance(inp, (list, tuple, set)):
        res = list(inp)
    return res
convert_str_repr_to_tuple(inp)
¤
    convert_str_repr_to_tuple converts string representation of tuple to tuple
Source code in dietbox/data/wrangling/misc.py
          def convert_str_repr_to_tuple(inp):
    """
    convert_str_repr_to_tuple converts string representation of tuple to tuple
    """
    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    if isinstance(inp, (list, tuple, set)):
        res = tuple(inp)
    return res
convert_to_bool(data)
¤
    convert_to_bool converts input to bool type in python.
The following values are converted to True:
- 'true'
 - 'yes'
 - '1'
 - 'y'
 - 1
 
The following values are converted to False:
- 'false'
 - 'no'
 - '0'
 - 'n'
 - 0
 
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
data | 
        input data  | 
        required | 
Returns:
| Type | Description | 
|---|---|
bool | 
      boolean value of the input data  | 
    
Source code in dietbox/data/wrangling/misc.py
          def convert_to_bool(data):
    """
    convert_to_bool converts input to bool type in python.
    The following values are converted to True:
    1. 'true'
    2. 'yes'
    3. '1'
    4. 'y'
    5. 1
    The following values are converted to False:
    1. 'false'
    2. 'no'
    3. '0'
    4. 'n'
    5. 0
    :param data: input data
    :return: boolean value of the input data
    :rtype: bool
    """
    res = None
    if data is None:
        return res
    elif isinstance(data, bool):
        res = data
    elif isinstance(data, str):
        if data.lower().strip() in ["true", "yes", "1", "y"]:
            res = True
        elif data.lower().strip() in ["false", "no", "0", "n"]:
            res = False
        else:
            res = None
    elif isinstance(data, (float, int)):
        res = bool(data)
    return res
convert_to_list(inp)
¤
    convert_to_list converts string representation of lists to list
It also works for list, tuple, or set input.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
inp | 
        string representation of list, list, tuple, set  | 
        required | 
Returns:
| Type | Description | 
|---|---|
list | 
      converted list  | 
    
Source code in dietbox/data/wrangling/misc.py
          def convert_to_list(inp):
    """
    convert_to_list converts string representation of lists to list
    It also works for list, tuple, or set input.
    :param inp: string representation of list, list, tuple, set
    :return: converted list
    :rtype: list
    """
    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    elif isinstance(inp, (list, tuple, set)):
        res = list(inp)
    return res
convert_to_tuple(inp)
¤
    convert_to_tuple converts string representation of tuple to tuple
It also works for list, tuple, or set input.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
inp | 
        string representation of tuple, list, tuple, set  | 
        required | 
Returns:
| Type | Description | 
|---|---|
tuple | 
      converted tuple  | 
    
Source code in dietbox/data/wrangling/misc.py
          def convert_to_tuple(inp):
    """
    convert_to_tuple converts string representation of tuple to tuple
    It also works for list, tuple, or set input.
    :param inp: string representation of tuple, list, tuple, set
    :return: converted tuple
    :rtype: tuple
    """
    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    if isinstance(inp, (list, tuple, set)):
        res = tuple(inp)
    return res
eu_float_string_to_float(data)
¤
    eu_float_string_to_float converts strings in EU format to floats
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
data | 
        str | 
        string of the float in EU conventions  | 
        required | 
Returns:
| Type | Description | 
|---|---|
float | 
      converted float from the string  | 
    
Source code in dietbox/data/wrangling/misc.py
          def eu_float_string_to_float(data):
    """
    eu_float_string_to_float converts strings in EU format to floats
    :param data: string of the float in EU conventions
    :type data: str
    :return: converted float from the string
    :rtype: float
    """
    if isinstance(data, str):
        res = data.replace(".", "")
        res = res.replace(",", ".")
        try:
            res = float(res)
        except Exception as e:
            raise Exception(f"Could not convert string {data} to float: {e}")
    else:
        raise TypeError("Input data should be string")
    return res
get_value_in_dict_recursively(dictionary, path, ignore_path_fail=None)
¤
    Get value of a dictionary according to specified path (names)
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
dictionary | 
        dict | 
        input dictionary  | 
        required | 
path | 
        list | 
        path to the value to be obtained This function always returns the value or None. >>> get_value_in_dict_recursively({'lvl_1':{'lvl_2':{'lvl_3':'lvl_3_value'}}},['lvl_1','lvl_3']) {'lvl_3':'lvl_3_value'} >>> get_value_in_dict_recursively({1:{2:{3:'hi'}}},[1,'2',3]) {'hi'}  | 
        required | 
Source code in dietbox/data/wrangling/misc.py
          def get_value_in_dict_recursively(dictionary, path, ignore_path_fail=None):
    """
    Get value of a dictionary according to specified path (names)
    :param dict dictionary: input dictionary
    :param list path: path to the value to be obtained
    This function always returns the value or None.
    >>> get_value_in_dict_recursively({'lvl_1':{'lvl_2':{'lvl_3':'lvl_3_value'}}},['lvl_1','lvl_3'])
    {'lvl_3':'lvl_3_value'}
    >>> get_value_in_dict_recursively({1:{2:{3:'hi'}}},[1,'2',3])
    {'hi'}
    """
    if ignore_path_fail is None:
        ignore_path_fail = True
    if isinstance(path, list):
        path_temp = path.copy()
    elif isinstance(path, tuple):
        path_temp = list(path).copy()
    else:
        logger.warning(f"path is not list or tuple, converting to list: {path}")
        path_temp = [path].copy()
    if len(path_temp) > 1:
        pop = path_temp.pop(0)
        try:
            pop = int(pop)
        except ValueError:
            if ignore_path_fail:
                logger.warning(f"can not get path")
                pass
            else:
                raise Exception(f"specified path ({path}) is not acceptable")
        try:
            return get_value_in_dict_recursively(dictionary[pop], path_temp)
        except:
            logger.debug(f"did not get values for {pop}")
            return None
    elif len(path_temp) == 0:
        return None
    else:
        try:
            val = int(path_temp[0])
        except:
            val = path_temp[0]
        try:
            return dictionary[val]
        except KeyError:
            logger.error(f"KeyError: Could not find {path_temp[0]}")
            return None
        except TypeError:
            logger.error(f"TypeError: Could not find {path_temp[0]}")
            return None
remove_outliers(dataset, criteria=None)
¤
    remove_outliers will filter out the outliers of dataset
Changes will be made to original dataset.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
dataset | 
        dataframe that contains the data to be filtered  | 
        required | 
Source code in dietbox/data/wrangling/misc.py
          def remove_outliers(dataset, criteria=None):
    """
    remove_outliers will filter out the outliers of dataset
    Changes will be made to original dataset.
    :param dataset: dataframe that contains the data to be filtered
    """
    logger.info("Removing outliers ... ")
    if criteria is None:
        criteria = {"target": {"quantile_range": [0.01, 0.99]}}
    for col in criteria:
        if col not in dataset.columns:
            logger.warning(f"Column {col} is not in dataset ({dataset.columns})")
            continue
        # Remove isna if required in criteria
        col_isna = criteria[col].get("isna", False)
        if col_isna:
            dataset = dataset.loc[~dataset[col].isna()]
        # only use between values
        col_range = criteria[col].get("range", [-np.inf, np.inf])
        col_quantile_range = criteria[col].get("quantile_range", ())
        if col_quantile_range:
            col_range_from_quantile_lower = dataset[col].quantile(col_quantile_range[0])
            col_range_from_quantile_upper = dataset[col].quantile(col_quantile_range[1])
            if col_range_from_quantile_lower >= col_range[0]:
                col_range[0] = col_range_from_quantile_lower
            if col_range_from_quantile_upper <= col_range[1]:
                col_range[1] = col_range_from_quantile_upper
        dataset = dataset.loc[dataset[col].between(*col_range)]
    logger.info("Removed outliers!")
update_dict_recursively(dictionary, key_path, value)
¤
    update or insert values to a dictionary recursively.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
dictionary | 
        dict | 
        the dictionary to be inserted into  | 
        required | 
key_path | 
        list | 
        the path for the insertion value  | 
        required | 
item | 
        value to be inserted  | 
        required | 
Returns:
| Type | Description | 
|---|---|
a dictionary with the inserted value >>> update_dict_recursively({}, ['a', 'b', 1, 2], 'this_value') {'a': {'b': {1: {2: 'this_value'}}}}  | 
    
Source code in dietbox/data/wrangling/misc.py
          def update_dict_recursively(dictionary, key_path, value):
    """
    update or insert values to a dictionary recursively.
    :param dict dictionary: the dictionary to be inserted into
    :param list key_path: the path for the insertion value
    :param item: value to be inserted
    :returns: a dictionary with the inserted value
    >>> update_dict_recursively({}, ['a', 'b', 1, 2], 'this_value')
    {'a': {'b': {1: {2: 'this_value'}}}}
    """
    sub_dictionary = dictionary
    for key in key_path[:-1]:
        if key not in sub_dictionary:
            sub_dictionary[key] = {}
        sub_dictionary = sub_dictionary[key]
    sub_dictionary[key_path[-1]] = value
    return dictionary