Skip to content

Commit b512a5c

Browse files
shorten docstrings of private functions; typos in defaults; other suggestions
1 parent 326b99c commit b512a5c

File tree

2 files changed

+15
-181
lines changed

2 files changed

+15
-181
lines changed

src/glum/_glm.py

+13-179
Original file line numberDiff line numberDiff line change
@@ -246,14 +246,14 @@ def _parse_formula(
246246
247247
Parameters
248248
----------
249-
formula : FormulaSpec
249+
formula : formulaic.FormulaSpec
250250
The formula to parse.
251251
include_intercept: bool, default True
252252
Whether to include an intercept column.
253253
254254
Returns
255255
-------
256-
tuple[Formula, Formula]
256+
tuple[formulaic.Formula, formulaic.Formula]
257257
The left-hand side and right-hand sides of the formula.
258258
"""
259259
if isinstance(formula, str):
@@ -1735,52 +1735,10 @@ def _wald_test_matrix(
17351735
expected_information=None,
17361736
context: Optional[Mapping[str, Any]] = None,
17371737
) -> WaldTestResult:
1738-
"""Compute the Wald test statistic and p-value for a linear hypothesis.
1739-
1740-
The hypothesis tested is ``R @ coef_ = r``. Under the null hypothesis,
1741-
the test statistic follows a chi-squared distribution with ``R.shape[0]``
1742-
degrees of freedom.
1743-
1744-
Parameters
1745-
----------
1746-
R : np.ndarray
1747-
The restriction matrix representing the linear combination of coefficients
1748-
to test.
1749-
r : np.ndarray, optional, default=None
1750-
The vector representing the values of the linear combination.
1751-
If None, the test is for whether the linear combinations of the coefficients
1752-
are zero.
1753-
X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
1754-
Training data. Can be omitted if a covariance matrix has already
1755-
been computed.
1756-
y : array-like, shape (n_samples,), optional
1757-
Target values. Can be omitted if a covariance matrix has already
1758-
been computed.
1759-
sample_weight : array-like, shape (n_samples,), optional, default=None
1760-
Individual weights for each sample.
1761-
offset : array-like, optional, default=None
1762-
Array with additive offsets.
1763-
mu : array-like, optional, default=None
1764-
Array with predictions. Estimated if absent.
1765-
dispersion : float, optional, default=None
1766-
The dispersion parameter. Estimated if absent.
1767-
robust : boolean, optional, default=None
1768-
Whether to compute robust standard errors instead of normal ones.
1769-
If not specified, the model's ``robust`` attribute is used.
1770-
clusters : array-like, optional, default=None
1771-
Array with cluster membership. Clustered standard errors are
1772-
computed if clusters is not None.
1773-
expected_information : boolean, optional, default=None
1774-
Whether to use the expected or observed information matrix.
1775-
Only relevant when computing robust standard errors.
1776-
If not specified, the model's ``expected_information`` attribute is used.
1777-
context : Optional[Mapping[str, Any]], default=None
1778-
The context to use for evaluating the formula.
1779-
1780-
Returns
1781-
-------
1782-
WaldTestResult
1783-
NamedTuple with test statistic, p-value, and degrees of freedom.
1738+
"""
1739+
Perform a Wald test statistic for a hypothesis specified by constraints
1740+
given as ``R @ coef_ = r``. Under the null hypothesis, the test statistic
1741+
follows a chi-squared distribution with ``R.shape[0]`` degrees of freedom.
17841742
"""
17851743

17861744
covariance_matrix = self.covariance_matrix(
@@ -1843,49 +1801,9 @@ def _wald_test_feature_names(
18431801
expected_information=None,
18441802
context: Optional[Mapping[str, Any]] = None,
18451803
) -> WaldTestResult:
1846-
"""Compute the Wald test statistic and p-value for a linear hypothesis.
1847-
1804+
"""
18481805
Perform a Wald test for the hypothesis that the coefficients of the
18491806
features in ``features`` are equal to the values in ``values``.
1850-
1851-
Parameters
1852-
----------
1853-
features: Union[str, list[str]]
1854-
The name of a feature or a list of features to test.
1855-
values: Sequence, optional, default=None
1856-
The values to which coefficients are compared. If None, the test is
1857-
for whether the coefficients are zero.
1858-
X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
1859-
Training data. Can be omitted if a covariance matrix has already
1860-
been computed.
1861-
y : array-like, shape (n_samples,), optional
1862-
Target values. Can be omitted if a covariance matrix has already
1863-
been computed.
1864-
sample_weight : array-like, shape (n_samples,), optional, default=None
1865-
Individual weights for each sample.
1866-
offset : array-like, optional, default=None
1867-
Array with additive offsets.
1868-
mu : array-like, optional, default=None
1869-
Array with predictions. Estimated if absent.
1870-
dispersion : float, optional, default=None
1871-
The dispersion parameter. Estimated if absent.
1872-
robust : boolean, optional, default=None
1873-
Whether to compute robust standard errors instead of normal ones.
1874-
If not specified, the model's ``robust`` attribute is used.
1875-
clusters : array-like, optional, default=None
1876-
Array with cluster membership. Clustered standard errors are
1877-
computed if clusters is not None.
1878-
expected_information : boolean, optional, default=None
1879-
Whether to use the expected or observed information matrix.
1880-
Only relevant when computing robust standard errors.
1881-
If not specified, the model's ``expected_information`` attribute is used.
1882-
context : Optional[Mapping[str, Any]], default=None
1883-
The context to use for evaluating the formula.
1884-
1885-
Returns
1886-
-------
1887-
WaldTestResult
1888-
NamedTuple with test statistic, p-value, and degrees of freedom.
18891807
"""
18901808

18911809
if isinstance(features, str):
@@ -1942,46 +1860,8 @@ def _wald_test_formula(
19421860
expected_information=None,
19431861
context: Optional[Mapping[str, Any]] = None,
19441862
) -> WaldTestResult:
1945-
"""Compute the Wald test statistic and p-value for a linear hypothesis.
1946-
1863+
"""
19471864
Perform a Wald test for the hypothesis described in ``formula``.
1948-
1949-
Parameters
1950-
----------
1951-
formula: str
1952-
A formula string describing the linear restrictions. For more information,
1953-
see `meth:ModelSpec.get_linear_constraints` in ``formulaic``.
1954-
X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
1955-
Training data. Can be omitted if a covariance matrix has already
1956-
been computed.
1957-
y : array-like, shape (n_samples,), optional
1958-
Target values. Can be omitted if a covariance matrix has already
1959-
been computed.
1960-
sample_weight : array-like, shape (n_samples,), optional, default=None
1961-
Individual weights for each sample.
1962-
offset : array-like, optional, default=None
1963-
Array with additive offsets.
1964-
mu : array-like, optional, default=None
1965-
Array with predictions. Estimated if absent.
1966-
dispersion : float, optional, default=None
1967-
The dispersion parameter. Estimated if absent.
1968-
robust : boolean, optional, default=None
1969-
Whether to compute robust standard errors instead of normal ones.
1970-
If not specified, the model's ``robust`` attribute is used.
1971-
clusters : array-like, optional, default=None
1972-
Array with cluster membership. Clustered standard errors are
1973-
computed if clusters is not None.
1974-
expected_information : boolean, optional, default=None
1975-
Whether to use the expected or observed information matrix.
1976-
Only relevant when computing robust standard errors.
1977-
If not specified, the model's ``expected_information`` attribute is used.
1978-
context : Optional[Mapping[str, Any]], default=None
1979-
The context to use for evaluating the formula.
1980-
1981-
Returns
1982-
-------
1983-
WaldTestResult
1984-
NamedTuple with test statistic, p-value, and degrees of freedom.
19851865
"""
19861866

19871867
if self.fit_intercept:
@@ -2023,54 +1903,9 @@ def _wald_test_term_names(
20231903
expected_information=None,
20241904
context: Optional[Mapping[str, Any]] = None,
20251905
) -> WaldTestResult:
2026-
"""Compute the Wald test statistic and p-value for a linear hypotheses.
2027-
1906+
"""
20281907
Perform a Wald test for the hypothesis that the coefficients of the
20291908
features in ``terms`` are equal to the values in ``terms``.
2030-
2031-
Parameters
2032-
----------
2033-
terms : Union[str, list[str]]
2034-
The name of a term or a list of terms to test. It can cover one or more
2035-
coefficients. In the case of a model based on a formula, a term is one
2036-
of the expressions separated by ``+`` signs. Otherwise, a term is one column
2037-
in the input data. As categorical variables need not be one-hot encoded in
2038-
glum, in their case, the hypothesis to be tested is that the coefficients
2039-
of all categories are equal to ``r``.
2040-
values: Sequence, optional, default=None
2041-
The values to which coefficients are compared. If None, the test is
2042-
for whether the coefficients are zero.
2043-
X : {array-like, sparse matrix}, shape (n_samples, n_features), optional
2044-
Training data. Can be omitted if a covariance matrix has already
2045-
been computed.
2046-
y : array-like, shape (n_samples,), optional
2047-
Target values. Can be omitted if a covariance matrix has already
2048-
been computed.
2049-
sample_weight : array-like, shape (n_samples,), optional (default=None)
2050-
Individual weights for each sample.
2051-
offset : array-like, optional, default=None
2052-
Array with additive offsets.
2053-
mu : array-like, optional, default=None
2054-
Array with predictions. Estimated if absent.
2055-
dispersion : float, optional, default=None
2056-
The dispersion parameter. Estimated if absent.
2057-
robust : boolean, optional, default=None
2058-
Whether to compute robust standard errors instead of normal ones.
2059-
If not specified, the model's ``robust`` attribute is used.
2060-
clusters : array-like, optional, default=None
2061-
Array with clusters membership. Clustered standard errors are
2062-
computed if clusters is not None.
2063-
expected_information : boolean, optional, default=None
2064-
Whether to use the expected or observed information matrix.
2065-
Only relevant when computing robust std-errors.
2066-
If not specified, the model's ``expected_information`` attribute is used.
2067-
context : Optional[Mapping[str, Any]], default=None
2068-
The context to use for evaluating the formula.
2069-
2070-
Returns
2071-
-------
2072-
WaldTestResult
2073-
NamedTuple with test statistic, p-value and degrees of freedom.
20741909
"""
20751910

20761911
if isinstance(terms, str):
@@ -2334,9 +2169,8 @@ def covariance_matrix(
23342169
"matrix will be incorrect."
23352170
)
23362171

2337-
cannot_estimate_cov = X is None or (
2338-
y is None and not hasattr(self, "y_model_spec_")
2339-
)
2172+
cannot_estimate_cov = (y is None) and not hasattr(self, "y_model_spec_")
2173+
cannot_estimate_cov |= X is None
23402174

23412175
if not skip_checks:
23422176
if cannot_estimate_cov and self.covariance_matrix_ is None:
@@ -3125,12 +2959,12 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
31252959
If true, then the expected information matrix is computed by default.
31262960
Only relevant when computing robust standard errors.
31272961
3128-
formula : FormulaSpec
2962+
formula : formulaic.FormulaSpec
31292963
A formula accepted by formulaic. It can either be a one-sided formula, in
31302964
which case ``y`` must be specified in ``fit``, or a two-sided formula, in
31312965
which case ``y`` must be ``None``.
31322966
3133-
interaction_separator: str, default ":"
2967+
interaction_separator: str, default=":"
31342968
The separator between the names of interacted variables.
31352969
31362970
categorical_format : str, optional, default='{name}[{category}]'

src/glum/_glm_cv.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,10 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
256256
which case ``y`` must be specified in ``fit``, or a two-sided formula, in
257257
which case ``y`` must be ``None``.
258258
259-
interaction_separator: str, default ":"
259+
interaction_separator: str, default=":"
260260
The separator between the names of interacted variables.
261261
262-
categorical_format: str, default "{name}[T.{category}]"
262+
categorical_format: str, default="{name}[T.{category}]"
263263
The format string used to generate the names of categorical variables.
264264
Has to include the placeholders ``{name}`` and ``{category}``.
265265
Only used if ``formula`` is not ``None``.

0 commit comments

Comments
 (0)