promto-c · July 23, 2024 06:46
diff --git a/tmp_example.py b/tmp_example.py
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error
 import category_encoders as ce
 import time

 # Load the dataset
 file_path = 'car1.csv'
 data = pd.read_csv(file_path)

 # Display the first few rows of the dataset
 print(data.head())

 # Identify the categorical columns
 categorical_cols = data.select_dtypes(include=['object']).columns

 # Use TargetEncoder to encode the categorical columns
 encoder = ce.TargetEncoder(cols=categorical_cols)
 data_encoded = encoder.fit_transform(data, data['selling_price'])
 # data_encoded = data
 # Display the first few rows of the encoded dataset
 print(data_encoded.head())

 # Split the dataset into features and target
 X = data_encoded.drop(columns=['selling_price'])
 y = data_encoded['selling_price']

 # Split the data into training and testing sets
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

 # Custom Linear Regression Class
 class CustomLinearRegression:
    """A simple implementation of Linear Regression using Ordinary Least Squares (OLS).
    
    Attributes:
        _coefficients (numpy.ndarray): Coefficients of the linear model.

    Methods:
        fit(X, y):
            Fits the linear model to the data.

        predict(X):
            Predicts the target values for the given input data.

        score(X, y):
            Computes the coefficient of determination (R^2 score) for the model.
        
        intercept_:
            Returns the intercept (bias) of the model.

        coef_:
            Returns the coefficients of the model.

    """
    def __init__(self):
        self._coefficients = None

    def fit(self, X, y):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add bias term (column of ones)
        # Add a small value to the diagonal to handle singularity
        regularization_term = 1e-8 * np.eye(X_b.shape[1])
        self._coefficients = np.linalg.inv(X_b.T @ X_b + regularization_term) @ X_b.T @ y

        return self

    def predict(self, X):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add bias term (column of ones)
        return X_b @ self._coefficients

    def score(self, X, y):
        y_pred = self.predict(X)
        total_variance = ((y - y.mean()) ** 2).sum()
        residual_variance = ((y - y_pred) ** 2).sum()
        return 1 - (residual_variance / total_variance)

    @property
    def intercept_(self):
        if self._coefficients is None:
            return

        return self._coefficients[0]

    @property
    def coef_(self):
        if self._coefficients is None:
            return

        return self._coefficients[1:]


 # Fit the custom linear regression model
 custom_model = CustomLinearRegression()
 start_time = time.time()
 custom_model.fit(X_train, y_train)
 custom_time = time.time() - start_time

 # Make predictions on the test set using custom model
 y_pred_custom = custom_model.predict(X_test)
 print(X_test)
 print(y_test)
 print(y_pred_custom)

 # Calculate the mean squared error for custom model
 mse_custom = mean_squared_error(y_test, y_pred_custom)
 print(f"Custom Model Mean Squared Error: {mse_custom}")

 # Print the custom model parameters
 print(f"Custom Model Intercept: {custom_model.intercept_}")
 print(f"Custom Model Coefficients: {custom_model.coef_}")

 # Fit the scikit-learn linear regression model
 model = LinearRegression()
 start_time = time.time()
 model.fit(X_train, y_train)
 sklearn_time = time.time() - start_time

 # Make predictions on the test set using scikit-learn model
 y_pred_sklearn = model.predict(X_test)
 print(X_test)
 print(y_test)
 print(y_pred_sklearn)

 # Calculate the mean squared error for scikit-learn model
 mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)
 print(f"Scikit-learn Model Mean Squared Error: {mse_sklearn}")

 # Print the scikit-learn model parameters
 print(f"Scikit-learn Model Intercept: {model.intercept_}")
 print(f"Scikit-learn Model Coefficients: {model.coef_}")

 # Compare execution time
 print(f"Custom Model Training Time: {custom_time}")
 print(f"Scikit-learn Model Training Time: {sklearn_time}")
diff --git a/tmp_linear_regression.py b/tmp_linear_regression.py
 from typing import Tuple, Optional

 import numpy as np
 import pandas as pd

 def sigmoid(x):
    return 1 / (1 + np.exp(-x))

 def train_test_split_pandas(features: pd.DataFrame, 
                            target: pd.Series, 
                            test_size: float = 0.2, 
                            random_state: Optional[int] = None
                           ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
    """Splits features and target DataFrames into train and test sets.

    Args:
        features (pd.DataFrame): The features DataFrame.
        target (pd.Series): The target Series.
        test_size (float): Proportion of the dataset to include in the test split.
        random_state (int, optional): Seed for the random number generator.

    Returns:
        pd.DataFrame, pd.DataFrame, pd.Series, pd.Series: The training and testing features and targets.
    """
    features = features.sample(frac=1, random_state=random_state).reset_index(drop=True)
    target = target.sample(frac=1, random_state=random_state).reset_index(drop=True)

    test_len = int(len(features) * test_size)
    training_features = features.iloc[test_len:]
    testing_features = features.iloc[:test_len]
    training_target = target.iloc[test_len:]
    y_test = target.iloc[:test_len]
    
    return training_features, testing_features, training_target, y_test

 class StandardScaler:
    """Standardize features by removing the mean and scaling to unit variance.

    Args:
        copy (bool): If True, performs in-place scaling.
        with_mean (bool): If True, center the data before scaling.
        with_std (bool): If True, scale the data to unit variance.

    Attributes:
        mean_ (numpy.ndarray): The mean value for each feature in the training set.
        scale_ (numpy.ndarray): The scaling factor (standard deviation) for each feature in the training set.

    Example:
        >>> import numpy as np
        >>> X = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])
        >>> scaler = StandardScaler()
        >>> scaler.fit_transform(X)
        array([[-1.22474487, -1.22474487, -1.22474487],
               [ 0.        ,  0.        ,  0.        ],
               [ 1.22474487,  1.22474487,  1.22474487]])
        >>> scaler.mean_
        array([4., 5., 6.])
        >>> scaler.scale_
        array([2.44948974, 2.44948974, 2.44948974])
    """

    def __init__(self, copy=True, with_mean=True, with_std=True):
        """Initialize the scaler with the given parameters."""
        self.copy = copy
        self.with_mean = with_mean
        self.with_std = with_std
        self.mean_ = None
        self.scale_ = None

    def fit(self, X, y=None):
        """Compute the mean and std to be used for later scaling.

        Args:
            X (numpy.ndarray): The data used to compute the mean and standard deviation.
            y (numpy.ndarray, optional): Ignored.

        Returns:
            self: Returns an instance of self.
        """
        self.mean_ = X.mean(axis=0) if self.with_mean else np.zeros(X.shape[1])
        self.scale_ = X.std(axis=0, ddof=0) if self.with_std else np.ones(X.shape[1])

        return self

    def transform(self, X, y=None):
        """Perform standardization by centering and scaling.

        Args:
            X (numpy.ndarray): The data to be transformed.

        Returns:
            numpy.ndarray: The transformed data.
        """
        if self.copy:
            X = X.copy()

        if self.with_mean:
            X -= self.mean_

        if self.with_std:
            X /= self.scale_

        return X

    def fit_transform(self, X, y=None):
        """Fit to data, then transform it.

        Args:
            X (numpy.ndarray): The data to fit and transform.
            y (numpy.ndarray, optional): Ignored.

        Returns:
            numpy.ndarray: The transformed data.
        """
        return self.fit(X, y).transform(X)

    def inverse_transform(self, X, y=None):
        """Scale back the data to the original representation.

        Args:
            X (numpy.ndarray): The data to be inverse transformed.

        Returns:
            numpy.ndarray: The inverse transformed data.
        """
        if self.copy:
            X = X.copy()

        if self.with_std:
            X *= self.scale_

        if self.with_mean:
            X += self.mean_

        return X

    def get_params(self, deep=True):
        """Get parameters for this estimator.

        Args:
            deep (bool): If True, will return the parameters for this estimator and contained subobjects that are estimators.

        Returns:
            dict: Parameter names mapped to their values.
        """
        return {"copy": self.copy, "with_mean": self.with_mean, "with_std": self.with_std}

    def set_params(self, **params):
        """Set the parameters of this estimator.

        Args:
            params (dict): The parameters to set for this estimator.

        Returns:
            self: Returns an instance of self.
        """
        for key, value in params.items():
            setattr(self, key, value)
        return self

 class TargetEncoder:
    """Implementation of TargetEncoder for encoding categorical variables.

    Attributes:
        cols (list): List of columns to encode.
        target_means (dict): Dictionary to store the mean target values for each category.

    Methods:
        fit(X, y):
            Fits the encoder to the data.
        transform(X):
            Transforms the categorical features using the learned target means.
        fit_transform(X, y):
            Fits the encoder and transforms the data in one step.
        get_params(deep=True):
            Get parameters for this estimator.
        set_params(**params):
            Set the parameters of this estimator.
    """

    def __init__(self, cols=None, handle_missing='value', handle_unknown='value', min_samples_leaf=20, smoothing=10):
        """Initialize the TargetEncoder with the given columns.

        Args:
            cols (list, optional): List of columns to encode. If None, all object-type columns are used.
            handle_missing (str, optional): How to handle missing values. Defaults to 'value'.
            handle_unknown (str, optional): How to handle unknown values. Defaults to 'value'.
            min_samples_leaf (int, optional): Minimum samples leaf for smoothing. Defaults to 20.
            smoothing (int, optional): Smoothing parameter. Defaults to 10.
        """
        self.cols = cols
        self.handle_missing = handle_missing
        self.handle_unknown = handle_unknown
        self.min_samples_leaf = min_samples_leaf
        self.smoothing = smoothing
        self.target_means = {}
        self.global_mean = None

    def fit(self, X, y):
        """Fit the encoder to the data.

        Args:
            X (pandas.DataFrame): Feature matrix.
            y (pandas.Series): Target vector.

        Returns:
            self: Returns an instance of self.
        """
        if self.cols is None:
            self.cols = X.select_dtypes(include=['object']).columns.tolist()

        self.global_mean = y.mean()

        for col in self.cols:
            stats = y.groupby(X[col]).agg(['count', 'mean'])
            weight = self._weighting(stats['count'])
            self.target_means[col] = self.global_mean * (1 - weight) + stats['mean'] * weight

            if self.handle_missing == 'value':
                self.target_means[col].loc[np.nan] = self.global_mean
            if self.handle_unknown == 'value':
                self.target_means[col].loc['unknown'] = self.global_mean

        return self

    def transform(self, X):
        """Transform the categorical features using the learned target means.

        Args:
            X (pandas.DataFrame): Feature matrix.

        Returns:
            pandas.DataFrame: Transformed feature matrix.
        """
        X_encoded = X.copy()
        for col in self.cols:
            X_encoded[col] = X[col].map(self.target_means[col]).fillna(self.global_mean)
            if self.handle_unknown == 'value':
                X_encoded[col] = X_encoded[col].fillna(self.target_means[col].get('unknown', self.global_mean))
            elif self.handle_missing == 'value':
                X_encoded[col] = X_encoded[col].fillna(self.global_mean)
        return X_encoded

    def fit_transform(self, X, y):
        """Fit the encoder and transforms the data in one step.

        Args:
            X (pandas.DataFrame): Feature matrix.
            y (pandas.Series): Target vector.

        Returns:
            pandas.DataFrame: Transformed feature matrix.
        """
        return self.fit(X, y).transform(X)

    def get_params(self, deep=True):
        """Get parameters for this estimator.

        Args:
            deep (bool): If True, will return the parameters for this estimator and contained subobjects that are estimators.

        Returns:
            dict: Parameter names mapped to their values.
        """
        return {
            "cols": self.cols,
            "handle_missing": self.handle_missing,
            "handle_unknown": self.handle_unknown,
            "min_samples_leaf": self.min_samples_leaf,
            "smoothing": self.smoothing
        }

    def set_params(self, **params):
        """Set the parameters of this estimator.

        Args:
            params (dict): The parameters to set for this estimator.

        Returns:
            self: Returns an instance of self.
        """
        for key, value in params.items():
            setattr(self, key, value)
        return self

    def _weighting(self, n):
        """Compute the weighting for smoothing.

        Args:
            n (int): Number of samples.

        Returns:
            float: Smoothing weight.
        """
        return sigmoid((n - self.min_samples_leaf) / self.smoothing)

 class LinearRegression:
    """A simple implementation of Linear Regression using Ordinary Least Squares (OLS).
    
    Attributes:
        coefficients (numpy.ndarray): Coefficients of the linear model.
        fit_intercept (bool): Whether to calculate the intercept for this model.
        copy_X (bool): If True, X will be copied; otherwise, it may be overwritten.
        positive (bool): When set to True, forces the coefficients to be positive.

    Methods:
        fit(X, y):
            Fits the linear model to the data.
        
        predict(X):
            Predicts the target values for the given input data.
        
        score(X, y):
            Computes the coefficient of determination (R^2 score) for the model.
        
        intercept_:
            Returns the intercept (bias) of the model.
        
        coef_:
            Returns the coefficients of the model.
    """
    
    def __init__(self, fit_intercept=True, copy_X=True, n_jobs=None, positive=False):
        self._coefficients = None
        self.fit_intercept = fit_intercept
        self.copy_X = copy_X
        self.n_jobs = n_jobs
        self.positive = positive

    def fit(self, X, y):
        """Fits the linear model to the data using the Ordinary Least Squares (OLS) method.

        Args:
            X (numpy.ndarray): Feature matrix.
            y (numpy.ndarray): Target vector.

        Returns:
            self: Returns an instance of self.
        """
        if self.copy_X:
            X = X.copy()
            y = y.copy()
        
        if self.fit_intercept:
            X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add bias term (column of ones)
        else:
            X_b = X

        # Add a small value to the diagonal to handle singularity
        regularization_term = 1e-8 * np.eye(X_b.shape[1])
        self._coefficients = np.linalg.inv(X_b.T @ X_b + regularization_term) @ X_b.T @ y

        if self.positive:
            self._coefficients = np.maximum(self._coefficients, 0)

        return self

    def predict(self, X):
        """Predicts the target values for the given input data.

        Args:
            X (numpy.ndarray): Feature matrix.

        Returns:
            numpy.ndarray: Predicted target values.
        """
        if self.fit_intercept:
            X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add bias term (column of ones)
        else:
            X_b = X
        return X_b @ self._coefficients

    def score(self, X, y):
        """Computes the coefficient of determination (R^2 score) for the model.

        Args:
            X (numpy.ndarray): Feature matrix.
            y (numpy.ndarray): True target values.

        Returns:
            float: R^2 score.
        """
        y_pred = self.predict(X)
        total_variance = ((y - y.mean()) ** 2).sum()
        residual_variance = ((y - y_pred) ** 2).sum()
        return 1 - (residual_variance / total_variance)

    @property
    def intercept_(self):
        """Returns the intercept (bias) of the model.

        Returns:
            float: Intercept of the model.
        """
        if self.fit_intercept and self._coefficients is not None:
            return self._coefficients[0]
        else:
            return 0.0

    @property
    def coef_(self):
        """Returns the coefficients of the model.

        Returns:
            numpy.ndarray: Coefficients of the model.
        """
        if self.fit_intercept and self._coefficients is not None:
            return self._coefficients[1:]
        else:
            return self._coefficients
diff --git a/tmp_multi_label_binarizer.py b/tmp_multi_label_binarizer.py
 import numpy as np
 import pandas as pd
 from typing import List, Union

 class MultiLabelBinarizer:
    """Implementation of MultiLabelBinarizer for encoding multilabel data.

    Attributes:
        classes_ (np.ndarray): Array of all unique classes found during fitting.

    Methods:
        fit(X):
            Fits the binarizer to the data.

        transform(X):
            Transforms the data using the learned classes.

        fit_transform(X):
            Fits the binarizer and transforms the data in one step.
    """

    def __init__(self):
        self.classes_: np.ndarray = None

    def fit(self, X: List[List[Union[str, int]]]) -> 'MultiLabelBinarizer':
        """Fits the binarizer to the data.

        Args:
            X (List[List[Union[str, int]]]): Multilabel data to fit.

        Returns:
            self: Returns an instance of self.
        """
        unique_classes = set()
        for labels in X:
            unique_classes.update(labels)
        self.classes_ = np.array(sorted(unique_classes))
        return self

    def transform(self, X: List[List[Union[str, int]]]) -> np.ndarray:
        """Transforms the data using the learned classes.

        Args:
            X (List[List[Union[str, int]]]): Multilabel data to transform.

        Returns:
            np.ndarray: Binarized data.
        """
        if self.classes_ is None:
            raise ValueError("The binarizer is not fitted yet.")
        binarized = np.zeros((len(X), len(self.classes_)), dtype=int)
        class_to_index = {cls: idx for idx, cls in enumerate(self.classes_)}
        for i, labels in enumerate(X):
            for label in labels:
                binarized[i, class_to_index[label]] = 1
        return binarized

    def fit_transform(self, X: List[List[Union[str, int]]]) -> np.ndarray:
        """Fits the binarizer and transforms the data in one step.

        Args:
            X (List[List[Union[str, int]]]): Multilabel data to fit and transform.

        Returns:
            np.ndarray: Binarized data.
        """
        return self.fit(X).transform(X)

 # Example usage
 if __name__ == "__main__":
    # Sample data
    data = pd.DataFrame({
        'tags': ['a,b,c', 'b,c,d', 'a,d', 'a,b']
    })

    # Convert tags column to list of labels
    data['tags'] = data['tags'].apply(lambda x: x.split(','))

    # MultiLabelBinarizer
    mlb = MultiLabelBinarizer()
    tags_encoded = mlb.fit_transform(data['tags'].tolist())
    tags_df = pd.DataFrame(tags_encoded, columns=mlb.classes_)
    encoded_data = pd.concat([data.drop(columns=['tags']), tags_df], axis=1)
    print("MultiLabelBinarizer Encoded Data:\n", encoded_data)
	import numpy as np
	import pandas as pd
	from sklearn.linear_model import LinearRegression
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_squared_error
	import category_encoders as ce
	import time

	# Load the dataset
	file_path = 'car1.csv'
	data = pd.read_csv(file_path)

	# Display the first few rows of the dataset
	print(data.head())

	# Identify the categorical columns
	categorical_cols = data.select_dtypes(include=['object']).columns

	# Use TargetEncoder to encode the categorical columns
	encoder = ce.TargetEncoder(cols=categorical_cols)
	data_encoded = encoder.fit_transform(data, data['selling_price'])
	# data_encoded = data
	# Display the first few rows of the encoded dataset
	print(data_encoded.head())

	# Split the dataset into features and target
	X = data_encoded.drop(columns=['selling_price'])
	y = data_encoded['selling_price']

	# Split the data into training and testing sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

	# Custom Linear Regression Class
	class CustomLinearRegression:
	"""A simple implementation of Linear Regression using Ordinary Least Squares (OLS).

	Attributes:
	_coefficients (numpy.ndarray): Coefficients of the linear model.

	Methods:
	fit(X, y):
	Fits the linear model to the data.

	predict(X):
	Predicts the target values for the given input data.

	score(X, y):
	Computes the coefficient of determination (R^2 score) for the model.

	intercept_:
	Returns the intercept (bias) of the model.

	coef_:
	Returns the coefficients of the model.

	"""
	def __init__(self):
	self._coefficients = None

	def fit(self, X, y):
	X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term (column of ones)
	# Add a small value to the diagonal to handle singularity
	regularization_term = 1e-8 * np.eye(X_b.shape[1])
	self._coefficients = np.linalg.inv(X_b.T @ X_b + regularization_term) @ X_b.T @ y

	return self

	def predict(self, X):
	X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term (column of ones)
	return X_b @ self._coefficients

	def score(self, X, y):
	y_pred = self.predict(X)
	total_variance = ((y - y.mean()) ** 2).sum()
	residual_variance = ((y - y_pred) ** 2).sum()
	return 1 - (residual_variance / total_variance)

	@property
	def intercept_(self):
	if self._coefficients is None:
	return

	return self._coefficients[0]

	@property
	def coef_(self):
	if self._coefficients is None:
	return

	return self._coefficients[1:]


	# Fit the custom linear regression model
	custom_model = CustomLinearRegression()
	start_time = time.time()
	custom_model.fit(X_train, y_train)
	custom_time = time.time() - start_time

	# Make predictions on the test set using custom model
	y_pred_custom = custom_model.predict(X_test)
	print(X_test)
	print(y_test)
	print(y_pred_custom)

	# Calculate the mean squared error for custom model
	mse_custom = mean_squared_error(y_test, y_pred_custom)
	print(f"Custom Model Mean Squared Error: {mse_custom}")

	# Print the custom model parameters
	print(f"Custom Model Intercept: {custom_model.intercept_}")
	print(f"Custom Model Coefficients: {custom_model.coef_}")

	# Fit the scikit-learn linear regression model
	model = LinearRegression()
	start_time = time.time()
	model.fit(X_train, y_train)
	sklearn_time = time.time() - start_time

	# Make predictions on the test set using scikit-learn model
	y_pred_sklearn = model.predict(X_test)
	print(X_test)
	print(y_test)
	print(y_pred_sklearn)

	# Calculate the mean squared error for scikit-learn model
	mse_sklearn = mean_squared_error(y_test, y_pred_sklearn)
	print(f"Scikit-learn Model Mean Squared Error: {mse_sklearn}")

	# Print the scikit-learn model parameters
	print(f"Scikit-learn Model Intercept: {model.intercept_}")
	print(f"Scikit-learn Model Coefficients: {model.coef_}")

	# Compare execution time
	print(f"Custom Model Training Time: {custom_time}")
	print(f"Scikit-learn Model Training Time: {sklearn_time}")
	from typing import Tuple, Optional

	import numpy as np
	import pandas as pd

	def sigmoid(x):
	return 1 / (1 + np.exp(-x))

	def train_test_split_pandas(features: pd.DataFrame,
	target: pd.Series,
	test_size: float = 0.2,
	random_state: Optional[int] = None
	) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
	"""Splits features and target DataFrames into train and test sets.

	Args:
	features (pd.DataFrame): The features DataFrame.
	target (pd.Series): The target Series.
	test_size (float): Proportion of the dataset to include in the test split.
	random_state (int, optional): Seed for the random number generator.

	Returns:
	pd.DataFrame, pd.DataFrame, pd.Series, pd.Series: The training and testing features and targets.
	"""
	features = features.sample(frac=1, random_state=random_state).reset_index(drop=True)
	target = target.sample(frac=1, random_state=random_state).reset_index(drop=True)

	test_len = int(len(features) * test_size)
	training_features = features.iloc[test_len:]
	testing_features = features.iloc[:test_len]
	training_target = target.iloc[test_len:]
	y_test = target.iloc[:test_len]

	return training_features, testing_features, training_target, y_test

	class StandardScaler:
	"""Standardize features by removing the mean and scaling to unit variance.

	Args:
	copy (bool): If True, performs in-place scaling.
	with_mean (bool): If True, center the data before scaling.
	with_std (bool): If True, scale the data to unit variance.

	Attributes:
	mean_ (numpy.ndarray): The mean value for each feature in the training set.
	scale_ (numpy.ndarray): The scaling factor (standard deviation) for each feature in the training set.

	Example:
	>>> import numpy as np
	>>> X = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]])
	>>> scaler = StandardScaler()
	>>> scaler.fit_transform(X)
	array([[-1.22474487, -1.22474487, -1.22474487],
	[ 0. , 0. , 0. ],
	[ 1.22474487, 1.22474487, 1.22474487]])
	>>> scaler.mean_
	array([4., 5., 6.])
	>>> scaler.scale_
	array([2.44948974, 2.44948974, 2.44948974])
	"""

	def __init__(self, copy=True, with_mean=True, with_std=True):
	"""Initialize the scaler with the given parameters."""
	self.copy = copy
	self.with_mean = with_mean
	self.with_std = with_std
	self.mean_ = None
	self.scale_ = None

	def fit(self, X, y=None):
	"""Compute the mean and std to be used for later scaling.

	Args:
	X (numpy.ndarray): The data used to compute the mean and standard deviation.
	y (numpy.ndarray, optional): Ignored.

	Returns:
	self: Returns an instance of self.
	"""
	self.mean_ = X.mean(axis=0) if self.with_mean else np.zeros(X.shape[1])
	self.scale_ = X.std(axis=0, ddof=0) if self.with_std else np.ones(X.shape[1])

	return self

	def transform(self, X, y=None):
	"""Perform standardization by centering and scaling.

	Args:
	X (numpy.ndarray): The data to be transformed.

	Returns:
	numpy.ndarray: The transformed data.
	"""
	if self.copy:
	X = X.copy()

	if self.with_mean:
	X -= self.mean_

	if self.with_std:
	X /= self.scale_

	return X

	def fit_transform(self, X, y=None):
	"""Fit to data, then transform it.

	Args:
	X (numpy.ndarray): The data to fit and transform.
	y (numpy.ndarray, optional): Ignored.

	Returns:
	numpy.ndarray: The transformed data.
	"""
	return self.fit(X, y).transform(X)

	def inverse_transform(self, X, y=None):
	"""Scale back the data to the original representation.

	Args:
	X (numpy.ndarray): The data to be inverse transformed.

	Returns:
	numpy.ndarray: The inverse transformed data.
	"""
	if self.copy:
	X = X.copy()

	if self.with_std:
	X *= self.scale_

	if self.with_mean:
	X += self.mean_

	return X

	def get_params(self, deep=True):
	"""Get parameters for this estimator.

	Args:
	deep (bool): If True, will return the parameters for this estimator and contained subobjects that are estimators.

	Returns:
	dict: Parameter names mapped to their values.
	"""
	return {"copy": self.copy, "with_mean": self.with_mean, "with_std": self.with_std}

	def set_params(self, **params):
	"""Set the parameters of this estimator.

	Args:
	params (dict): The parameters to set for this estimator.

	Returns:
	self: Returns an instance of self.
	"""
	for key, value in params.items():
	setattr(self, key, value)
	return self

	class TargetEncoder:
	"""Implementation of TargetEncoder for encoding categorical variables.

	Attributes:
	cols (list): List of columns to encode.
	target_means (dict): Dictionary to store the mean target values for each category.

	Methods:
	fit(X, y):
	Fits the encoder to the data.
	transform(X):
	Transforms the categorical features using the learned target means.
	fit_transform(X, y):
	Fits the encoder and transforms the data in one step.
	get_params(deep=True):
	Get parameters for this estimator.
	set_params(**params):
	Set the parameters of this estimator.
	"""

	def __init__(self, cols=None, handle_missing='value', handle_unknown='value', min_samples_leaf=20, smoothing=10):
	"""Initialize the TargetEncoder with the given columns.

	Args:
	cols (list, optional): List of columns to encode. If None, all object-type columns are used.
	handle_missing (str, optional): How to handle missing values. Defaults to 'value'.
	handle_unknown (str, optional): How to handle unknown values. Defaults to 'value'.
	min_samples_leaf (int, optional): Minimum samples leaf for smoothing. Defaults to 20.
	smoothing (int, optional): Smoothing parameter. Defaults to 10.
	"""
	self.cols = cols
	self.handle_missing = handle_missing
	self.handle_unknown = handle_unknown
	self.min_samples_leaf = min_samples_leaf
	self.smoothing = smoothing
	self.target_means = {}
	self.global_mean = None

	def fit(self, X, y):
	"""Fit the encoder to the data.

	Args:
	X (pandas.DataFrame): Feature matrix.
	y (pandas.Series): Target vector.

	Returns:
	self: Returns an instance of self.
	"""
	if self.cols is None:
	self.cols = X.select_dtypes(include=['object']).columns.tolist()

	self.global_mean = y.mean()

	for col in self.cols:
	stats = y.groupby(X[col]).agg(['count', 'mean'])
	weight = self._weighting(stats['count'])
	self.target_means[col] = self.global_mean * (1 - weight) + stats['mean'] * weight

	if self.handle_missing == 'value':
	self.target_means[col].loc[np.nan] = self.global_mean
	if self.handle_unknown == 'value':
	self.target_means[col].loc['unknown'] = self.global_mean

	return self

	def transform(self, X):
	"""Transform the categorical features using the learned target means.

	Args:
	X (pandas.DataFrame): Feature matrix.

	Returns:
	pandas.DataFrame: Transformed feature matrix.
	"""
	X_encoded = X.copy()
	for col in self.cols:
	X_encoded[col] = X[col].map(self.target_means[col]).fillna(self.global_mean)
	if self.handle_unknown == 'value':
	X_encoded[col] = X_encoded[col].fillna(self.target_means[col].get('unknown', self.global_mean))
	elif self.handle_missing == 'value':
	X_encoded[col] = X_encoded[col].fillna(self.global_mean)
	return X_encoded

	def fit_transform(self, X, y):
	"""Fit the encoder and transforms the data in one step.

	Args:
	X (pandas.DataFrame): Feature matrix.
	y (pandas.Series): Target vector.

	Returns:
	pandas.DataFrame: Transformed feature matrix.
	"""
	return self.fit(X, y).transform(X)

	def get_params(self, deep=True):
	"""Get parameters for this estimator.

	Args:
	deep (bool): If True, will return the parameters for this estimator and contained subobjects that are estimators.

	Returns:
	dict: Parameter names mapped to their values.
	"""
	return {
	"cols": self.cols,
	"handle_missing": self.handle_missing,
	"handle_unknown": self.handle_unknown,
	"min_samples_leaf": self.min_samples_leaf,
	"smoothing": self.smoothing
	}

	def set_params(self, **params):
	"""Set the parameters of this estimator.

	Args:
	params (dict): The parameters to set for this estimator.

	Returns:
	self: Returns an instance of self.
	"""
	for key, value in params.items():
	setattr(self, key, value)
	return self

	def _weighting(self, n):
	"""Compute the weighting for smoothing.

	Args:
	n (int): Number of samples.

	Returns:
	float: Smoothing weight.
	"""
	return sigmoid((n - self.min_samples_leaf) / self.smoothing)

	class LinearRegression:
	"""A simple implementation of Linear Regression using Ordinary Least Squares (OLS).

	Attributes:
	coefficients (numpy.ndarray): Coefficients of the linear model.
	fit_intercept (bool): Whether to calculate the intercept for this model.
	copy_X (bool): If True, X will be copied; otherwise, it may be overwritten.
	positive (bool): When set to True, forces the coefficients to be positive.

	Methods:
	fit(X, y):
	Fits the linear model to the data.

	predict(X):
	Predicts the target values for the given input data.

	score(X, y):
	Computes the coefficient of determination (R^2 score) for the model.

	intercept_:
	Returns the intercept (bias) of the model.

	coef_:
	Returns the coefficients of the model.
	"""

	def __init__(self, fit_intercept=True, copy_X=True, n_jobs=None, positive=False):
	self._coefficients = None
	self.fit_intercept = fit_intercept
	self.copy_X = copy_X
	self.n_jobs = n_jobs
	self.positive = positive

	def fit(self, X, y):
	"""Fits the linear model to the data using the Ordinary Least Squares (OLS) method.

	Args:
	X (numpy.ndarray): Feature matrix.
	y (numpy.ndarray): Target vector.

	Returns:
	self: Returns an instance of self.
	"""
	if self.copy_X:
	X = X.copy()
	y = y.copy()

	if self.fit_intercept:
	X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term (column of ones)
	else:
	X_b = X

	# Add a small value to the diagonal to handle singularity
	regularization_term = 1e-8 * np.eye(X_b.shape[1])
	self._coefficients = np.linalg.inv(X_b.T @ X_b + regularization_term) @ X_b.T @ y

	if self.positive:
	self._coefficients = np.maximum(self._coefficients, 0)

	return self

	def predict(self, X):
	"""Predicts the target values for the given input data.

	Args:
	X (numpy.ndarray): Feature matrix.

	Returns:
	numpy.ndarray: Predicted target values.
	"""
	if self.fit_intercept:
	X_b = np.c_[np.ones((X.shape[0], 1)), X] # Add bias term (column of ones)
	else:
	X_b = X
	return X_b @ self._coefficients

	def score(self, X, y):
	"""Computes the coefficient of determination (R^2 score) for the model.

	Args:
	X (numpy.ndarray): Feature matrix.
	y (numpy.ndarray): True target values.

	Returns:
	float: R^2 score.
	"""
	y_pred = self.predict(X)
	total_variance = ((y - y.mean()) ** 2).sum()
	residual_variance = ((y - y_pred) ** 2).sum()
	return 1 - (residual_variance / total_variance)

	@property
	def intercept_(self):
	"""Returns the intercept (bias) of the model.

	Returns:
	float: Intercept of the model.
	"""
	if self.fit_intercept and self._coefficients is not None:
	return self._coefficients[0]
	else:
	return 0.0

	@property
	def coef_(self):
	"""Returns the coefficients of the model.

	Returns:
	numpy.ndarray: Coefficients of the model.
	"""
	if self.fit_intercept and self._coefficients is not None:
	return self._coefficients[1:]
	else:
	return self._coefficients
	import numpy as np
	import pandas as pd
	from typing import List, Union

	class MultiLabelBinarizer:
	"""Implementation of MultiLabelBinarizer for encoding multilabel data.

	Attributes:
	classes_ (np.ndarray): Array of all unique classes found during fitting.

	Methods:
	fit(X):
	Fits the binarizer to the data.

	transform(X):
	Transforms the data using the learned classes.

	fit_transform(X):
	Fits the binarizer and transforms the data in one step.
	"""

	def __init__(self):
	self.classes_: np.ndarray = None

	def fit(self, X: List[List[Union[str, int]]]) -> 'MultiLabelBinarizer':
	"""Fits the binarizer to the data.

	Args:
	X (List[List[Union[str, int]]]): Multilabel data to fit.

	Returns:
	self: Returns an instance of self.
	"""
	unique_classes = set()
	for labels in X:
	unique_classes.update(labels)
	self.classes_ = np.array(sorted(unique_classes))
	return self

	def transform(self, X: List[List[Union[str, int]]]) -> np.ndarray:
	"""Transforms the data using the learned classes.

	Args:
	X (List[List[Union[str, int]]]): Multilabel data to transform.

	Returns:
	np.ndarray: Binarized data.
	"""
	if self.classes_ is None:
	raise ValueError("The binarizer is not fitted yet.")
	binarized = np.zeros((len(X), len(self.classes_)), dtype=int)
	class_to_index = {cls: idx for idx, cls in enumerate(self.classes_)}
	for i, labels in enumerate(X):
	for label in labels:
	binarized[i, class_to_index[label]] = 1
	return binarized

	def fit_transform(self, X: List[List[Union[str, int]]]) -> np.ndarray:
	"""Fits the binarizer and transforms the data in one step.

	Args:
	X (List[List[Union[str, int]]]): Multilabel data to fit and transform.

	Returns:
	np.ndarray: Binarized data.
	"""
	return self.fit(X).transform(X)

	# Example usage
	if __name__ == "__main__":
	# Sample data
	data = pd.DataFrame({
	'tags': ['a,b,c', 'b,c,d', 'a,d', 'a,b']
	})

	# Convert tags column to list of labels
	data['tags'] = data['tags'].apply(lambda x: x.split(','))

	# MultiLabelBinarizer
	mlb = MultiLabelBinarizer()
	tags_encoded = mlb.fit_transform(data['tags'].tolist())
	tags_df = pd.DataFrame(tags_encoded, columns=mlb.classes_)
	encoded_data = pd.concat([data.drop(columns=['tags']), tags_df], axis=1)
	print("MultiLabelBinarizer Encoded Data:\n", encoded_data)