import scipy as sp
from enum import Enum
from sklearn.metrics import pairwise_distances
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array, check_is_fitted, check_random_state
[docs]class HiddenLayerType(Enum):
RANDOM = 1 # Gaussian random projection
SPARSE = 2 # Sparse Random Projection
PAIRWISE = 3 # Pairwise kernel with a number of centroids
def dummy(x):
return x
[docs]def flatten(items):
"""Yield items from any nested iterable."""
for x in items:
# don't break strings into characters
if hasattr(x, '__iter__') and not isinstance(x, (str, bytes)):
yield from flatten(x)
else:
yield x
def _is_list_of_strings(obj):
return obj is not None and all(isinstance(elem, str) for elem in obj)
def _dense(X):
if sp.sparse.issparse(X):
return X.todense()
else:
return X
[docs]class PairwiseRandomProjection(BaseEstimator, TransformerMixin):
def __init__(self, n_components=100, pairwise_metric='l2', n_jobs=None, random_state=None):
"""Pairwise distances projection with random centroids.
Parameters
----------
n_components : int
Number of components (centroids) in the projection. Creates the same number of output features.
pairwise_metric : str
A valid pairwise distance metric, see pairwise-distances_.
.. _pairwise-distances: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html#sklearn.metrics.pairwise_distances
n_jobs : int or None, optional, default=None
Number of jobs to use in distance computations, or `None` for no parallelism.
Passed to _pairwise-distances function.
random_state
Used for random generation of centroids.
"""
self.n_components = n_components
self.pairwise_metric = pairwise_metric
self.n_jobs = n_jobs
self.random_state = random_state
[docs] def fit(self, X, y=None):
"""Generate artificial centroids.
Centroids are sampled from a normal distribution. They work best if the data is normalized.
Parameters
----------
X : {array-like, sparse matrix}, shape=[n_samples, n_features]
Input data
"""
X = check_array(X, accept_sparse=True)
self.random_state_ = check_random_state(self.random_state)
if self.n_components <= 0:
raise ValueError("n_components must be greater than 0, got %s" % self.n_components)
self.components_ = self.random_state_.randn(self.n_components, X.shape[1])
self.n_jobs_ = 1 if self.n_jobs is None else self.n_jobs
return self