tmoerman/arboreto · core.py
python logo
def fit_model(regressor_type,
              regressor_kwargs,
              tf_matrix,
              target_gene_expression,
              early_stop_window_length=EARLY_STOP_WINDOW_LENGTH,
              seed=DEMON_SEED):
    """
    :param regressor_type: string. Case insensitive.
    :param regressor_kwargs: a dictionary of key-value pairs that configures the regressor.
    :param tf_matrix: the predictor matrix (transcription factor matrix) as a numpy array.
    :param target_gene_expression: the target (y) gene expression to predict in function of the tf_matrix (X).
    :param early_stop_window_length: window length of the early stopping monitor.
    :param seed: (optional) random seed for the regressors.
    :return: a trained regression model.
    """
    regressor_type = regressor_type.upper()

    assert tf_matrix.shape[0] == len(target_gene_expression)

    def do_sklearn_regression():
        regressor = SKLEARN_REGRESSOR_FACTORY[regressor_type](random_state=seed, **regressor_kwargs)

        with_early_stopping = is_oob_heuristic_supported(regressor_type, regressor_kwargs)

        if with_early_stopping:
            regressor.fit(tf_matrix, target_gene_expression, monitor=EarlyStopMonitor(early_stop_window_length))
        else:
            regressor.fit(tf_matrix, target_gene_expression)

        return regressor

    if is_sklearn_regressor(regressor_type):
        return do_sklearn_regression()
    # elif is_xgboost_regressor(regressor_type):
    #     raise ValueError('XGB regressor not yet supported')
    else:
        raise ValueError('Unsupported regressor type: {0}'.format(regressor_type))
Similar code snippets
1.
tmoerman/arboreto · core.py
Match rating: 66.51% · See similar code snippets
python logo
def infer_partial_network(regressor_type,
                          regressor_kwargs,
                          tf_matrix,
                          tf_matrix_gene_names,
                          target_gene_name,
                          target_gene_expression,
                          include_meta=False,
                          early_stop_window_length=EARLY_STOP_WINDOW_LENGTH,
                          seed=DEMON_SEED):
    """
    Ties together regressor model training with regulatory links and meta data extraction.

    :param regressor_type: string. Case insensitive.
    :param regressor_kwargs: dict of key-value pairs that configures the regressor.
    :param tf_matrix: numpy matrix. The feature matrix X to use for the regression.
    :param tf_matrix_gene_names: list of transcription factor names corresponding to the columns of the tf_matrix used to
                                 train the regression model.
    :param target_gene_name: the name of the target gene to infer the regulatory links for.
    :param target_gene_expression: the expression profile of the target gene. Numpy array.
    :param include_meta: whether to also return the meta information DataFrame.
    :param early_stop_window_length: window length of the early stopping monitor.
    :param seed: (optional) random seed for the regressors.
    :return: if include_meta == True, return links_df, meta_df

             link_df: a Pandas DataFrame['TF', 'target', 'importance'] containing inferred regulatory links and their
             connection strength.

             meta_df: a Pandas DataFrame['target', 'meta', 'value'] containing meta information regarding the trained
             regression model.
    """
    def fn():
        (clean_tf_matrix, clean_tf_matrix_gene_names) = clean(tf_matrix, tf_matrix_gene_names, target_gene_name)

        try:
            trained_regressor = fit_model(regressor_type, regressor_kwargs, clean_tf_matrix, target_gene_expression,
                                          early_stop_window_length, seed)
        except ValueError as e:
            raise ValueError("Regression for target gene {0} failed. Cause {1}.".format(target_gene_name, repr(e)))

        links_df = to_links_df(regressor_type, regressor_kwargs, trained_regressor, clean_tf_matrix_gene_names,
                               target_gene_name)

        if include_meta:
            meta_df = to_meta_df(trained_regressor, target_gene_name)

            return links_df, meta_df
        else:
            return links_df

    fallback_result = (None, None) if include_meta else None

    return retry(fn,
                 fallback_result=fallback_result,
                 warning_msg='infer_data failed for target {0}'.format(target_gene_name))
2.
mattjj/pyslds · util.py
Match rating: 61.71% · See similar code snippets
python logo
def regression_logprior(regression):
    if isinstance(regression, DiagonalRegression):
        return diag_regression_logprior(regression)
    elif isinstance(regression, Regression):
        return dense_regression_logprior(regression)
3.
wdm0006/sklearn-extensions · elm.py
Match rating: 61.3% · See similar code snippets
python logo
def fit(self, X, y):
        """
        Fit the model using X, y as training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like of shape [n_samples, n_outputs]
            Target values (class labels in classification, real numbers in
            regression)

        Returns
        -------
        self : object

            Returns an instance of self.
        """
        rhl = self._create_random_layer()
        self._genelm_regressor = GenELMRegressor(hidden_layer=rhl,
                                                 regressor=self.regressor)
        self._genelm_regressor.fit(X, y)
        return self
4.
mattjj/pyslds · util.py
Match rating: 58.79% · See similar code snippets
python logo
def expected_regression_log_prob(regression, stats):
    if isinstance(regression, DiagonalRegression):
        return expected_diag_regression_log_prob(
            regression.A, regression.sigmasq_flat, stats)
    elif isinstance(regression, Regression):
        return expected_dense_regression_log_prob(
            regression.A, regression.sigma, stats)
    else:
        raise Exception("Unrecognized regression object! {}".format(regression))
5.
tmoerman/arboreto · core.py
Match rating: 57.73% · See similar code snippets
python logo
def to_feature_importances(regressor_type,
                           regressor_kwargs,
                           trained_regressor):
    """
    Motivation: when the out-of-bag improvement heuristic is used, we cancel the effect of normalization by dividing
    by the number of trees in the regression ensemble by multiplying again by the number of trees used.

    This enables prioritizing links that were inferred in a regression where lots of

    :param regressor_type: string. Case insensitive.
    :param regressor_kwargs: a dictionary of key-value pairs that configures the regressor.
    :param trained_regressor: the trained model from which to extract the feature importances.
    :return: the feature importances inferred from the trained model.
    """

    if is_oob_heuristic_supported(regressor_type, regressor_kwargs):
        n_estimators = len(trained_regressor.estimators_)

        denormalized_importances = trained_regressor.feature_importances_ * n_estimators

        return denormalized_importances
    else:
        return trained_regressor.feature_importances_
6.
google/prettytensor · pretty_tensor_loss_methods.py
Match rating: 54.88% · See similar code snippets
python logo
def apply_regression(input_,
                     regression_fn,
                     target,
                     regression_args=(),
                     regression_kwargs=None,
                     name=PROVIDED,
                     loss_weight=None,
                     per_example_weights=None):
  """Applies the given regression and adds the loss to the bookkeeper.

  This does not change tensor.
  Args:
    input_: A Tensor or a Pretty Tensor holding the input.
    regression_fn: A function that takes (in order) tensor, labels.
    target: The targe of the regression.
    regression_args: Other arguments for the regression.
    regression_kwargs: Keyword args for the regression.
    name: The name, also added to regression_kwargs.
    loss_weight: A scalar multiplier for the loss.
    per_example_weights: A Tensor with a weight per example.
  Returns:
    The loss tensor's name.
  Raises:
    ValueError: If the target is not a compatible shape with input_.
  """
  if regression_kwargs is None:
    regression_kwargs = {}
  if name is not None and 'name' not in regression_kwargs:
    regression_kwargs['name'] = name
  elif name is None:
    name = input_.tensor.op.name

  tensor = input_.tensor
  loss = regression_fn(tensor, target, *regression_args, **regression_kwargs)
  if loss_weight is not None:
    loss *= loss_weight
  if per_example_weights is not None:
    per_example_weights = _convert_and_assert_per_example_weights_compatible(
        input_,
        per_example_weights,
        dtype=loss.dtype)
    loss *= per_example_weights
  # Use mean so that the learning rate is independent of the batch size.
  if name is None:
    name = loss.op.name
  if tensor.get_shape()[0].value is not None:
    # Try to use division instead of reduce_mean because reduce_mean doesn't
    # work on GPU.
    avg_loss = tf.reduce_sum(loss) / tensor.get_shape()[0].value
  else:
    avg_loss = tf.reduce_mean(loss)
  return input_.add_loss(avg_loss, name=name)
7.
bsolomon1124/pyfinance · general.py
Match rating: 54.57% · See similar code snippets
python logo
def variance_inflation_factor(regressors, hasconst=False):
    """Calculate variance inflation factor (VIF) for each all `regressors`.

    A wrapper/modification of statsmodels:
    statsmodels.stats.outliers_influence.variance_inflation_factor

    One recommendation is that if VIF is greater than 5, then the explanatory
    variable `x` is highly collinear with the other explanatory
    variables, and the parameter estimates will have large standard errors
    because of this. [source: StatsModels]

    Parameters
    ----------
    regressors: DataFrame
        DataFrame containing the entire set of regressors
    hasconst : bool, default False
        If False, a column vector will be added to `regressors` for use in
        OLS

    Example
    -------
    # Generate some data
    from datetime import date
    from pandas_datareader.data import DataReader as dr

    syms = {'TWEXBMTH' : 'usd',
            'T10Y2YM' : 'term_spread',
            'PCOPPUSDM' : 'copper'
           }
    start = date(2000, 1, 1)
    data = (dr(syms.keys(), 'fred', start)
            .pct_change()
            .dropna())
    data = data.rename(columns = syms)

    print(variance_inflation_factor(data))
    usd            1.31609
    term_spread    1.03793
    copper         1.37055
    dtype: float64
    """

    if not hasconst:
        regressors = add_constant(regressors, prepend=False)
    k = regressors.shape[1]

    def vif_sub(x, regressors):
        x_i = regressors.iloc[:, x]
        mask = np.arange(k) != x
        x_not_i = regressors.iloc[:, mask]
        rsq = linear_model.OLS(x_i, x_not_i, missing="drop").fit().rsquared_adj
        vif = 1.0 / (1.0 - rsq)
        return vif

    vifs = pd.Series(np.arange(k), index=regressors.columns)
    vifs = vifs.apply(vif_sub, args=(regressors,))

    # Find the constant column (probably called 'const', but not necessarily
    # and drop it. `is_nonzero_const` borrowed from statsmodels.add_constant
    is_nonzero_const = np.ptp(regressors.values, axis=0) == 0
    is_nonzero_const &= np.all(regressors != 0.0, axis=0)
    vifs.drop(vifs.index[is_nonzero_const], inplace=True)
    return vifs
8.
mattjj/pyslds · util.py
Match rating: 54.56% · See similar code snippets
python logo
def regression_map_estimation(stats, regression):
    D_out = regression.D_out

    # Add prior and likelihood statistics
    if isinstance(regression, DiagonalRegression):
        regression.max_likelihood(data=None, stats=stats)
    else:
        sum_tuples = lambda lst: list(map(sum, zip(*lst)))
        yyT, yxT, xxT, n = sum_tuples([stats, regression.natural_hypparam])

        A = np.linalg.solve(xxT, yxT.T).T
        sigma = (yyT - A.dot(yxT.T)) / n

        # Make sure sigma is symmetric
        symmetrize = lambda A: (A + A.T) / 2.
        sigma = 1e-10 * np.eye(D_out) + symmetrize(sigma)

        regression.A = A
        regression.sigma = sigma
9.
nok/sklearn-porter · Porter.py
Match rating: 53.42% · See similar code snippets
python logo
def _regressors(self):
        """
        Get a set of supported regressors.

        Returns
        -------
        regressors : {set}
            The set of supported regressors.
        """

        # sklearn version < 0.18.0
        regressors = ()

        # sklearn version >= 0.18.0
        if self.sklearn_ver[:2] >= (0, 18):
            from sklearn.neural_network.multilayer_perceptron \
                import MLPRegressor
            regressors += (MLPRegressor, )

        return regressors
10.
wdm0006/sklearn-extensions · elm.py
Match rating: 53.2% · See similar code snippets
python logo
def predict(self, X):
        """
        Predict values using the model

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape [n_samples, n_features]

        Returns
        -------
        C : numpy array of shape [n_samples, n_outputs]
            Predicted values.
        """
        if self._genelm_regressor is None:
            raise ValueError("SimpleELMRegressor not fitted")

        return self._genelm_regressor.predict(X)