Skip to content

lab.model.feature

Lab - Model - Feature¤

MultiColumnCategicalEncoder ¤

Labelencoder applied to multiple columns

__init__(self, encoders=None, columns=None, encode_with=None) special ¤

init initializes the MultiColumnCategicalEncoder with encoder and scaler types.

Parameters:

Name Type Description Default
encoders dict, optional

dictionary of encoders to be used on cols, defaults to {}

None
columns list, optional

list of columns to be encoded, defaults to None

None
encode_with

scikit learn categorical encoders to be applied on cols

None
Source code in dietbox/lab/model/feature.py
def __init__(self, encoders=None, columns=None, encode_with=None):
    """
    __init__ initializes the MultiColumnCategicalEncoder with encoder and scaler types.

    :param encoders: dictionary of encoders to be used on cols, defaults to {}
    :type encoders: dict, optional
    :param columns: list of columns to be encoded, defaults to None
    :type columns: list, optional
    :param encode_with: scikit learn categorical encoders to be applied on cols
    """
    self.columns = columns
    if encode_with is None:
        encode_with = LabelEncoder
    self.encode_with = encode_with
    if encoders:
        self.encoders = encoders
    else:
        self.encoders = {}

transform(self, X) ¤

Transforms columns of X specified in self.columns using LabelEncoder(). If no columns specified, transforms all columns in X.

Parameters:

Name Type Description Default
X

dataset to be transformed

required

Returns:

Type Description

transformed dataset

Source code in dietbox/lab/model/feature.py
def transform(self, X):
    """
    Transforms columns of X specified in self.columns using
    LabelEncoder(). If no columns specified, transforms all
    columns in X.

    :param X: dataset to be transformed
    :return: transformed dataset
    """
    output = X.copy()
    if self.columns is not None:
        for col in self.columns:
            _le = self.encode_with()
            if self.encoders.get(col):
                output[col] = self.encoders.get(col).transform(output[col])
            else:
                output[col] = _le.fit_transform(output[col])
                logger.debug(f"1. preparing encoder for {col}")
                self.encoders[col] = _le
                self.check_encoders.append({col: _le})
    else:
        for colname, col in output.iteritems():
            _le = self.encode_with()
            if self.encoders.get(col):
                output[colname] = self.encoders.get(colname).transform(col)
            else:
                output[colname] = _le.fit_transform(col)
                logger.debug(f"2. preparing encoder for {col}")
                self.encoders[colname] = _le
                self.check_encoders.append({colname: _le})
    return output

MultiColumnScaler ¤

Scalers to be applied to multiple columns.

__init__(self, encoders=None, columns=None, encode_with=None) special ¤

init initializes the MultiColumnScaler with encoder and scaler types.

Parameters:

Name Type Description Default
encoders dict, optional

dictionary of encoders to be used on cols, defaults to {}

None
columns list, optional

list of columns to be encoded, defaults to None

None
encode_with

scaler to be applied, defaults to StandardScaler

None
Source code in dietbox/lab/model/feature.py
def __init__(self, encoders=None, columns=None, encode_with=None):
    """
    __init__ initializes the MultiColumnScaler with encoder and scaler types.

    :param encoders: dictionary of encoders to be used on cols, defaults to {}
    :type encoders: dict, optional
    :param columns: list of columns to be encoded, defaults to None
    :type columns: list, optional
    :param encode_with: scaler to be applied, defaults to StandardScaler
    """
    if encode_with is None:
        encode_with = StandardScaler
    self.encode_with = encode_with
    self.columns = columns  # array of column names to encode
    if encoders:
        self.encoders = encoders
    else:
        self.encoders = {}

transform(self, X) ¤

Transforms columns of X specified in self.columns using LabelEncoder(). If no columns specified, transforms all columns in X.

Parameters:

Name Type Description Default
X

dataset to be transformed

required

Returns:

Type Description

transformed dataset

Source code in dietbox/lab/model/feature.py
def transform(self, X):
    """
    Transforms columns of X specified in self.columns using
    LabelEncoder(). If no columns specified, transforms all
    columns in X.

    :param X: dataset to be transformed
    :return: transformed dataset
    """
    output = X.copy()
    if self.columns is not None:
        for col in self.columns:
            _scaler = self.encode_with()
            if self.encoders.get(col):
                output[col] = self.encoders.get(col).transform(output[[col]])
            else:
                output[col] = _scaler.fit_transform(output[[col]])
                logger.debug(f"1. preparing encoder for {col}")
                self.encoders[col] = _scaler
                self.check_encoders.append({col: _scaler})
    else:
        for colname, col in output.iteritems():
            _scaler = self.encode_with()
            if self.encoders.get(col):
                output[colname] = self.encoders.get(colname).transform(
                    pd.DataFrame(col)
                )
            else:
                output[colname] = _scaler.fit_transform(pd.DataFrame(col))
                logger.debug(f"2. preparing encoder for {col}")
                self.encoders[colname] = _scaler
                self.check_encoders.append({colname: _scaler})
    return output