lab.model.feature
Lab - Model - Feature¤
        
MultiColumnCategicalEncoder        
¤
    Labelencoder applied to multiple columns
__init__(self, encoders=None, columns=None, encode_with=None)
  
      special
  
¤
    init initializes the MultiColumnCategicalEncoder with encoder and scaler types.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
encoders | 
        dict, optional | 
        dictionary of encoders to be used on cols, defaults to {}  | 
        None | 
      
columns | 
        list, optional | 
        list of columns to be encoded, defaults to None  | 
        None | 
      
encode_with | 
        scikit learn categorical encoders to be applied on cols  | 
        None | 
      
Source code in dietbox/lab/model/feature.py
          def __init__(self, encoders=None, columns=None, encode_with=None):
    """
    __init__ initializes the MultiColumnCategicalEncoder with encoder and scaler types.
    :param encoders: dictionary of encoders to be used on cols, defaults to {}
    :type encoders: dict, optional
    :param columns: list of columns to be encoded, defaults to None
    :type columns: list, optional
    :param encode_with: scikit learn categorical encoders to be applied on cols
    """
    self.columns = columns
    if encode_with is None:
        encode_with = LabelEncoder
    self.encode_with = encode_with
    if encoders:
        self.encoders = encoders
    else:
        self.encoders = {}
transform(self, X)
¤
    Transforms columns of X specified in self.columns using LabelEncoder(). If no columns specified, transforms all columns in X.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
X | 
        dataset to be transformed  | 
        required | 
Returns:
| Type | Description | 
|---|---|
transformed dataset  | 
    
Source code in dietbox/lab/model/feature.py
          def transform(self, X):
    """
    Transforms columns of X specified in self.columns using
    LabelEncoder(). If no columns specified, transforms all
    columns in X.
    :param X: dataset to be transformed
    :return: transformed dataset
    """
    output = X.copy()
    if self.columns is not None:
        for col in self.columns:
            _le = self.encode_with()
            if self.encoders.get(col):
                output[col] = self.encoders.get(col).transform(output[col])
            else:
                output[col] = _le.fit_transform(output[col])
                logger.debug(f"1. preparing encoder for {col}")
                self.encoders[col] = _le
                self.check_encoders.append({col: _le})
    else:
        for colname, col in output.iteritems():
            _le = self.encode_with()
            if self.encoders.get(col):
                output[colname] = self.encoders.get(colname).transform(col)
            else:
                output[colname] = _le.fit_transform(col)
                logger.debug(f"2. preparing encoder for {col}")
                self.encoders[colname] = _le
                self.check_encoders.append({colname: _le})
    return output
        
MultiColumnScaler        
¤
    Scalers to be applied to multiple columns.
__init__(self, encoders=None, columns=None, encode_with=None)
  
      special
  
¤
    init initializes the MultiColumnScaler with encoder and scaler types.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
encoders | 
        dict, optional | 
        dictionary of encoders to be used on cols, defaults to {}  | 
        None | 
      
columns | 
        list, optional | 
        list of columns to be encoded, defaults to None  | 
        None | 
      
encode_with | 
        scaler to be applied, defaults to StandardScaler  | 
        None | 
      
Source code in dietbox/lab/model/feature.py
          def __init__(self, encoders=None, columns=None, encode_with=None):
    """
    __init__ initializes the MultiColumnScaler with encoder and scaler types.
    :param encoders: dictionary of encoders to be used on cols, defaults to {}
    :type encoders: dict, optional
    :param columns: list of columns to be encoded, defaults to None
    :type columns: list, optional
    :param encode_with: scaler to be applied, defaults to StandardScaler
    """
    if encode_with is None:
        encode_with = StandardScaler
    self.encode_with = encode_with
    self.columns = columns  # array of column names to encode
    if encoders:
        self.encoders = encoders
    else:
        self.encoders = {}
transform(self, X)
¤
    Transforms columns of X specified in self.columns using LabelEncoder(). If no columns specified, transforms all columns in X.
Parameters:
| Name | Type | Description | Default | 
|---|---|---|---|
X | 
        dataset to be transformed  | 
        required | 
Returns:
| Type | Description | 
|---|---|
transformed dataset  | 
    
Source code in dietbox/lab/model/feature.py
          def transform(self, X):
    """
    Transforms columns of X specified in self.columns using
    LabelEncoder(). If no columns specified, transforms all
    columns in X.
    :param X: dataset to be transformed
    :return: transformed dataset
    """
    output = X.copy()
    if self.columns is not None:
        for col in self.columns:
            _scaler = self.encode_with()
            if self.encoders.get(col):
                output[col] = self.encoders.get(col).transform(output[[col]])
            else:
                output[col] = _scaler.fit_transform(output[[col]])
                logger.debug(f"1. preparing encoder for {col}")
                self.encoders[col] = _scaler
                self.check_encoders.append({col: _scaler})
    else:
        for colname, col in output.iteritems():
            _scaler = self.encode_with()
            if self.encoders.get(col):
                output[colname] = self.encoders.get(colname).transform(
                    pd.DataFrame(col)
                )
            else:
                output[colname] = _scaler.fit_transform(pd.DataFrame(col))
                logger.debug(f"2. preparing encoder for {col}")
                self.encoders[colname] = _scaler
                self.check_encoders.append({colname: _scaler})
    return output