vmirly/pyclust · _kmeans.py
python logo
def fit(self, X):
        """ Apply KMeans Clustering
              X: dataset with feature vectors
        """
        self.centers_, self.labels_, self.sse_arr_, self.n_iter_ = \
              _kmeans(X, self.n_clusters, self.max_iter, self.n_trials, self.tol)
Similar code snippets
1.
vmirly/pyclust · _bisect_kmeans.py
Match rating: 81.15% · See similar code snippets
python logo
def fit(self, X):
        """
        """
        self.centers_, self.labels_, self.sse_arr_, self.tree_ = \
            _bisect_kmeans(X, self.n_clusters, self.n_trials, self.max_iter, self.tol)
2.
vmirly/pyclust · _kmedoids.py
Match rating: 78.53% · See similar code snippets
python logo
def fit(self, X):
        """ Apply KMeans Clustering
              X: dataset with feature vectors
        """
        self.centers_, self.labels_, self.sse_arr_, self.n_iter_ = \
              _kmedoids(X, self.n_clusters, self.distance, self.max_iter, self.n_trials, self.tol, self.rng)
3.
vmirly/pyclust · _bisect_kmeans.py
Match rating: 65.69% · See similar code snippets
python logo
def __init__(self, n_clusters=2, n_trials=10, max_iter=100, tol=0.0001):
        assert n_clusters >= 2, 'n_clusters should be >= 2'
        self.n_clusters = n_clusters
        self.n_trials = n_trials
        self.max_iter = max_iter
        self.tol = tol
4.
Match rating: 62.9% · See similar code snippets
python logo
def __init__(self, n_clusters=2, n_trials=10, init_method='', max_iter=100, tol=0.0001):
        assert n_clusters >= 2, 'n_clusters should be >= 2'
        self.n_clusters = n_clusters
        self.n_trials = n_trials
        self.init_method = init_method
        self.max_iter = max_iter
        self.tol = tol
   
        self.converged = False
5.
vmirly/pyclust · _bisect_kmeans.py
Match rating: 60.04% · See similar code snippets
python logo
def _bisect_kmeans(X, n_clusters, n_trials, max_iter, tol):
    """ Apply Bisecting Kmeans clustering
        to reach n_clusters number of clusters
    """
    membs = np.empty(shape=X.shape[0], dtype=int)
    centers = dict() #np.empty(shape=(n_clusters,X.shape[1]), dtype=float)
    sse_arr = dict() #-1.0*np.ones(shape=n_clusters, dtype=float)

    ## data structure to store cluster hierarchies
    tree = treelib.Tree()
    tree = _add_tree_node(tree, 0, ilev=0, X=X) 

    km = _kmeans.KMeans(n_clusters=2, n_trials=n_trials, max_iter=max_iter, tol=tol)
    for i in range(1,n_clusters):
        sel_clust_id,sel_memb_ids = _select_cluster_2_split(membs, tree)
        X_sub = X[sel_memb_ids,:]
        km.fit(X_sub)

        #print("Bisecting Step %d    :"%i, sel_clust_id, km.sse_arr_, km.centers_)
        ## Updating the clusters & properties
        #sse_arr[[sel_clust_id,i]] = km.sse_arr_
        #centers[[sel_clust_id,i]] = km.centers_
        tree = _add_tree_node(tree, 2*i-1, i, \
                              size=np.sum(km.labels_ == 0), center=km.centers_[0], \
                              sse=km.sse_arr_[0], parent= sel_clust_id)
        tree = _add_tree_node(tree, 2*i,   i, \
                             size=np.sum(km.labels_ == 1), center=km.centers_[1], \
                             sse=km.sse_arr_[1], parent= sel_clust_id)

        pred_labels = km.labels_
        pred_labels[np.where(pred_labels == 1)[0]] = 2*i
        pred_labels[np.where(pred_labels == 0)[0]] = 2*i - 1
        #if sel_clust_id == 1:
        #    pred_labels[np.where(pred_labels == 0)[0]] = sel_clust_id
        #    pred_labels[np.where(pred_labels == 1)[0]] = i
        #else:
        #    pred_labels[np.where(pred_labels == 1)[0]] = i
        #    pred_labels[np.where(pred_labels == 0)[0]] = sel_clust_id

        membs[sel_memb_ids] = pred_labels


    for n in tree.leaves():
        label = n.data['label']
        centers[label] = n.data['center']
        sse_arr[label] = n.data['sse']

    return(centers, membs, sse_arr, tree)
6.
oscarbranson/latools · classifier_obj.py
Match rating: 58.93% · See similar code snippets
python logo
def fit_kmeans(self, data, n_clusters, **kwargs):
        """
        Fit KMeans clustering algorithm to data.

        Parameters
        ----------
        data : array-like
            A dataset formatted by `classifier.fitting_data`.
        n_clusters : int
            The number of clusters in the data.
        **kwargs
            passed to `sklearn.cluster.KMeans`.

        Returns
        -------
        Fitted `sklearn.cluster.KMeans` object.
        """
        km = cl.KMeans(n_clusters=n_clusters, **kwargs)
        km.fit(data)
        return km
7.
oscarbranson/latools · clustering.py
Match rating: 58.54% · See similar code snippets
python logo
def cluster_kmeans(data, n_clusters, **kwargs):
    """
    Identify clusters using K - Means algorithm.

    Parameters
    ----------
    data : array_like
        array of size [n_samples, n_features].
    n_clusters : int
        The number of clusters expected in the data.

    Returns
    -------
    dict
        boolean array for each identified cluster.
    """
    km = cl.KMeans(n_clusters, **kwargs)
    kmf = km.fit(data)

    labels = kmf.labels_

    return labels, [np.nan]
8.
dask/dask-ml · k_means.py
Match rating: 57.78% · See similar code snippets
python logo
def fit(self, X, y=None):
        X = self._check_array(X)
        labels, centroids, inertia, n_iter = k_means(
            X,
            self.n_clusters,
            oversampling_factor=self.oversampling_factor,
            random_state=self.random_state,
            init=self.init,
            return_n_iter=True,
            max_iter=self.max_iter,
            init_max_iter=self.init_max_iter,
            tol=self.tol,
        )
        self.cluster_centers_ = centroids
        self.labels_ = labels
        self.inertia_ = inertia.compute()
        self.n_iter_ = n_iter
        return self
9.
dragnet-org/dragnet · weninger.py
Match rating: 57.61% · See similar code snippets
python logo
def __init__(self, n_clusters=3, n_init=3, max_iter=50, tol=0.001):
        self.n_clusters = n_clusters
        self.n_init = n_init
        self.max_iter = max_iter
        self.tol = tol
        self.kmeans = KMeans(
            n_clusters=n_clusters,
            n_init=n_init,
            max_iter=max_iter,
            tol=tol)
10.
jasonlaska/spherecluster · spherical_kmeans.py
Match rating: 57.26% · See similar code snippets
python logo
def fit(self, X, y=None, sample_weight=None):
        """Compute k-means clustering.

        Parameters
        ----------

        X : array-like or sparse matrix, shape=(n_samples, n_features)

        y : Ignored
            not used, present here for API consistency by convention.

        sample_weight : array-like, shape (n_samples,), optional
            The weights for each observation in X. If None, all observations
            are assigned equal weight (default: None)
        """
        if self.normalize:
            X = normalize(X)

        random_state = check_random_state(self.random_state)

        # TODO: add check that all data is unit-normalized

        self.cluster_centers_, self.labels_, self.inertia_, self.n_iter_ = spherical_k_means(
            X,
            n_clusters=self.n_clusters,
            sample_weight=sample_weight,
            init=self.init,
            n_init=self.n_init,
            max_iter=self.max_iter,
            verbose=self.verbose,
            tol=self.tol,
            random_state=random_state,
            copy_x=self.copy_x,
            n_jobs=self.n_jobs,
            return_n_iter=True,
        )

        return self