dshean/pygeotools · malib.py
python logo
def robust_linreg(x, y, model='theilsen'):
    from sklearn import linear_model
    slope = None
    intercept = None
    if model == 'linear':
        m = linear_model.LinearRegression()
        m.fit(x, y)
        slope = m.coef_
        intercept = m.intercept_
    elif model == 'ransac':
        m = linear_model.RANSACRegressor()
        m.fit(x, y)
        slope = m.estimator_.coef_
        intercept = m.estimator_.intercept_
        #inlier_mask = ransac.inlier_mask_
        #outlier_mask = np.logical_not(inlier_mask)
    elif model == 'theilsen':
        m = linear_model.TheilSenRegressor()
        m.fit(x, y)
        slope = m.coef_
        intercept = m.intercept_
    #xi = np.arange(x.min(), x.max())[:,np.newaxis]
    #yi = model.predict(xi) 
    #ax.plot(xi, yi)
    return(slope[0], intercept)
Similar code snippets
1.
cavia-io/coupling · aop.py
Match rating: 54.47% · See similar code snippets
python logo
def __init__(self, target, interceptors):
        self.target = target
        self.interceptors = interceptors
2.
SoftwareDefinedBuildings/XBOS · Main.py
Match rating: 50.82% · See similar code snippets
python logo
def func(X, y):
    from sklearn.linear_model import LinearRegression
    from sklearn.model_selection import cross_val_score
    model = LinearRegression()
    model.fit(X, y)
    return model.predict(X)
3.
vanheeringen-lab/gimmemotifs · moap.py
Match rating: 50.74% · See similar code snippets
python logo
def br_fit(X, y):
    model = BayesianRidge()
    model.fit(X, y)
    return model.coef_
4.
neuropsychology/NeuroKit.py · routines.py
Match rating: 49.39% · See similar code snippets
python logo
def fit_model(self, X, y):
        model = LogisticRegression(C=1)
        model = model.fit(X , y)
        return(model)
5.
openeemeter/eemeter · usage_per_day.py
Match rating: 49.24% · See similar code snippets
python logo
def get_intercept_only_candidate_models(data, weights_col):
    """ Return a list of a single candidate intercept-only model.

    Parameters
    ----------
    data : :any:`pandas.DataFrame`
        A DataFrame containing at least the column ``meter_value``.
        DataFrames of this form can be made using the
        :any:`eemeter.create_caltrack_daily_design_matrix` or
        :any:`eemeter.create_caltrack_billing_design_matrix` methods.
    weights_col : :any:`str` or None
        The name of the column (if any) in ``data`` to use as weights.

    Returns
    -------
    candidate_models : :any:`list` of :any:`CalTRACKUsagePerDayCandidateModel`
        List containing a single intercept-only candidate model.
    """
    model_type = "intercept_only"
    formula = "meter_value ~ 1"

    if weights_col is None:
        weights = 1
    else:
        weights = data[weights_col]

    try:
        model = smf.wls(formula=formula, data=data, weights=weights)
    except Exception as e:
        return [get_fit_failed_candidate_model(model_type, formula)]

    result = model.fit()

    # CalTrack 3.3.1.3
    model_params = {"intercept": result.params["Intercept"]}

    model_warnings = []

    # CalTrack 3.4.3.2
    for parameter in ["intercept"]:
        model_warnings.extend(
            get_parameter_negative_warning(model_type, model_params, parameter)
        )

    if len(model_warnings) > 0:
        status = "DISQUALIFIED"
    else:
        status = "QUALIFIED"

    return [
        CalTRACKUsagePerDayCandidateModel(
            model_type=model_type,
            formula=formula,
            status=status,
            warnings=model_warnings,
            model_params=model_params,
            model=model,
            result=result,
            r_squared_adj=0,
        )
    ]
6.
DistrictDataLabs/yellowbrick · bestfit.py
Match rating: 48.45% · See similar code snippets
python logo
def fit_quadratic(X, y):
    """
    Uses OLS with Polynomial order 2.
    """
    model = make_pipeline(
        PolynomialFeatures(2), linear_model.LinearRegression()
    )
    model.fit(X, y)
    return model
7.
slundberg/shap · models.py
Match rating: 47.58% · See similar code snippets
python logo
def cric__lasso():
    """ Lasso Regression
    """
    model = sklearn.linear_model.LogisticRegression(penalty="l1", C=0.002)

    # we want to explain the raw probability outputs of the trees
    model.predict = lambda X: model.predict_proba(X)[:,1]
    
    return model
8.
DistrictDataLabs/yellowbrick · bestfit.py
Match rating: 47.42% · See similar code snippets
python logo
def fit_select_best(X, y):
    """
    Selects the best fit of the estimators already implemented by choosing the
    model with the smallest mean square error metric for the trained values.
    """
    models = [fit(X,y) for fit in [fit_linear, fit_quadratic]]
    errors = map(lambda model: mse(y, model.predict(X)), models)

    return min(zip(models, errors), key=itemgetter(1))[0]
9.
apache/spark · regression.py
Match rating: 47.25% · See similar code snippets
python logo
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
    from pyspark.mllib.classification import LogisticRegressionModel
    first = data.first()
    if not isinstance(first, LabeledPoint):
        raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
    if initial_weights is None:
        initial_weights = [0.0] * len(data.first().features)
    if (modelClass == LogisticRegressionModel):
        weights, intercept, numFeatures, numClasses = train_func(
            data, _convert_to_vector(initial_weights))
        return modelClass(weights, intercept, numFeatures, numClasses)
    else:
        weights, intercept = train_func(data, _convert_to_vector(initial_weights))
        return modelClass(weights, intercept)
10.
albahnsen/CostSensitiveClassification · regression.py
Match rating: 47.2% · See similar code snippets
python logo
def fit(self, X, y, cost_mat):
        """ Build a example-dependent cost-sensitive logistic regression from the training set (X, y, cost_mat)

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        y : array indicator matrix
            Ground truth (correct) labels.

        cost_mat : array-like of shape = [n_samples, 4]
            Cost matrix of the classification problem
            Where the columns represents the costs of: false positives, false negatives,
            true positives and true negatives, for each example.

        Returns
        -------
        self : object
            Returns self.
        """

        #TODO: Check input

        n_features = X.shape[1]
        if self.fit_intercept:
            w0 = np.zeros(n_features + 1)
        else:
            w0 = np.zeros(n_features)

        if self.solver == 'ga':
            #TODO: add n_jobs
            res = GeneticAlgorithmOptimizer(_logistic_cost_loss,
                                            w0.shape[0],
                                            iters=self.max_iter,
                                            type_='cont',
                                            n_chromosomes=100,
                                            per_mutations=0.25,
                                            n_elite=10,
                                            fargs=(X, y, cost_mat, 1. / self.C),
                                            range_=(-5, 5),
                                            n_jobs=1,
                                            verbose=self.verbose)
            res.fit()

        elif self.solver == 'bfgs':

            if self.verbose > 0:
                disp = True
            else:
                disp = False

            res = minimize(_logistic_cost_loss,
                           w0,
                           method='BFGS',
                           args=(X, y, cost_mat, 1. / self.C),
                           tol=self.tol,
                           options={'maxiter': self.max_iter, 'disp': disp})

        if self.fit_intercept:
            self.coef_ = res.x[:-1]
            self.intercept_ = res.x[-1]
        else:
            self.coef_ = res.x