py-why · adam2392 · Jan 20, 2023 · Jan 5, 2023 · Jan 5, 2023 · Jan 7, 2023
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -116,7 +116,7 @@ jobs:
         python-version: [3.8, "3.10"]  # oldest and newest supported versions
         poetry-version: [1.3.0]
         networkx: [stable, main]
-    name: pytest ${{ matrix.os }} - py${{ matrix.python-version }} - Networkx ${{ matrix.networkx }}
+    name: Unit-test ${{ matrix.os }} - py${{ matrix.python-version }} - Networkx ${{ matrix.networkx }}
     runs-on: ${{ matrix.os }}-latest
     defaults:
       run:
@@ -138,12 +138,11 @@ jobs:
       - name: Install packages via poetry
         run: |
           poetry install --with test,ts
-      # TODO: uncomment, when MixedEdgeGraph PRed into networkx
-      # - name: Install Networkx (main)
-      #   if: "matrix.networkx == 'main'"
-      #   run: |
-      #     pip uninstall -yq networkx
-      #     pip install --progress-bar off git+https://github.com/networkx/networkx
+      - name: Install Networkx (main)
+        if: "matrix.networkx == 'main'"
+        run: |
+          pip uninstall -yq networkx
+          pip install --progress-bar off git+https://github.com/networkx/networkx
       - name: Run pytest  # headless via Xvfb on linux
         run: poetry run poe unit_test
       - name: Upload coverage stats to codecov
@@ -153,6 +152,40 @@ jobs:
           files: ./coverage.xml
           fail_ci_if_error: true
           verbose: true
+
+  integration_test:
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu]
+        python-version: ["3.10"]  # oldest and newest supported versions
+        poetry-version: [1.3.0]
+        networkx: [stable, main]
+    name: Integration-test ${{ matrix.os }} - py${{ matrix.python-version }} - Networkx ${{ matrix.networkx }}
+    runs-on: ${{ matrix.os }}-latest
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: 'x64'
+      - name: Install Poetry ${{ matrix.poetry-version }}
+        uses: abatilo/[email protected]
+        with:
+          poetry-version: ${{ matrix.poetry-version }}
+      - name: Install Poetry Dynamic Versioning Plugin
+        run: pip install poetry-dynamic-versioning
+      - name: Install packages via poetry
+        run: |
+          poetry install --with test,ts
+      - name: Run pytest  # headless via Xvfb on linux
+        run: poetry run poe integration_test
 
   # release is ran when a release is made on Github
   release:

diff --git a/docs/Makefile b/docs/Makefile
@@ -51,6 +51,7 @@ clean:
 	-rm -rf auto_examples/
 	-rm -rf generated/*
 	-rm -rf modules/generated/*
+	-rm -rf reference/simulation/generated/*
 
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html

diff --git a/docs/api.rst b/docs/api.rst
@@ -93,6 +93,7 @@ The following are useful functions that operate specifically on time-series grap
    has_homologous_edges
    nodes_in_time_order
 
+
 Visualization of causal graphs
 ==============================
 Visualization of causal graphs is different compared to networkx because causal graphs
@@ -116,3 +117,9 @@ Utilities for debugging
 
    sys_info
 
+Simulation
+==========
+.. toctree::
+   :maxdepth: 1
+
+   reference/simulation/index
diff --git a/docs/conf.py b/docs/conf.py
@@ -66,6 +66,15 @@
 copybutton_prompt_text = r">>> |\.\.\. |\$ "
 copybutton_prompt_is_regexp = True
 
+# -- Warnings management -----------------------------------------------------
+def setup(app):
+    # Ignore .ipynb files
+    app.registry.source_suffix.pop(".ipynb", None)
+
+warnings.filterwarnings(
+    "ignore", category=UserWarning
+)
+
 # generate autosummary even if no references
 # -- sphinx.ext.autosummary
 autosummary_generate = True
@@ -77,13 +86,13 @@
 
 # -- numpydoc
 # Below is needed to prevent errors
-# numpydoc_xref_param_type = True
+numpydoc_xref_param_type = False
 numpydoc_show_inherited_class_members = False
 numpydoc_show_class_members = False
 numpydoc_class_members_toctree = False
 numpydoc_attributes_as_param_list = True
 numpydoc_use_blockquotes = True
-# numpydoc_validate = True
+numpydoc_validate = False
 
 numpydoc_xref_ignore = {
     # words
@@ -186,6 +195,7 @@
     "column": "pandas.DataFrame.columns",
 }
 
+
 default_role = "obj"
 
 # Tell myst-parser to assign header anchors for h1-h3.
@@ -280,7 +290,7 @@
     "reference_url": {
         "pywhy_graphs": None,
     },
-    "backreferences_dir": "generated",
+    "backreferences_dir": "./generated",
     "plot_gallery": "True",  # Avoid annoying Unicode/bool default warning
     "examples_dirs": ["../examples"],
     "gallery_dirs": ["auto_examples"],
@@ -322,17 +332,15 @@
     },
 }
 
-# Enable nitpicky mode - which ensures that all references in the docs
+# Enable/Disable nitpicky mode - which ensures that all references in the docs
 # resolve.
 
-nitpicky = False
+nitpicky = True
 nitpick_ignore = [
     ("py:obj", "nx.MixedEdgeGraph"),
     ("py:obj", "networkx.MixedEdgeGraph"),
     ("py:obj", "pywhy_graphs.networkx.MixedEdgeGraph"),
     ("py:obj", "pywhy_nx.MixedEdgeGraph"),
-    ("py:class", "optional"),
-    ("py:class", "array"),
     ("py:class", "pywhy_nx.classes.timeseries.TimeSeriesGraph"),
     ("py:class", "pywhy_nx.classes.timeseries.TimeSeriesDiGraph"),
     ("py:class", "pywhy_nx.classes.timeseries.TimeSeriesMixedEdgeGraph"),
@@ -341,18 +349,8 @@
     ("py:class", "pywhy_nx.classes.timeseries.StationaryTimeSeriesMixedEdgeGraph"),
     ("py:class", "pywhy_graphs.classes.timeseries.base.tsdict"),
     ("py:class", "networkx.classes.mixededge.MixedEdgeGraph"),
-    ("py:class", "numpy._typing._array_like._SupportsArray"),
-    ("py:class", "numpy._typing._nested_sequence._NestedSequence"),
 ]
 nitpick_ignore_regex = [
-    ('py:obj', r"pywhy_graphs\.classes\.timeseries*"),
+    ('py:obj', r'pywhy_graphs.*timeseries.*'),
     ('py:obj', r"networkx*"),
 ]
-
-
-# -- Warnings management -----------------------------------------------------
-def setup(app):
-    # Ignore .ipynb files
-    app.registry.source_suffix.pop(".ipynb", None)
-
-warnings.filterwarnings("ignore", category=UserWarning)
diff --git a/docs/reference/simulation/index.rst b/docs/reference/simulation/index.rst
@@ -0,0 +1,20 @@
+.. _simulation:
+
+*****************
+Simulation module
+*****************
+
+We provide functions for simulating structural causal models starting from a
+causal graph. This is useful for testing causal discovery algorithms, which assume
+an underlying graph exists and then data is generated faithful to that graph.
+
+
+Time-series simulations
+=======================
+
+.. automodule:: pywhy_graphs.simulate
+.. autosummary::
+
+   simulate_linear_var_process
+   simulate_data_from_var
+   simulate_var_process_from_summary_graph
diff --git a/pyproject.toml b/pyproject.toml
@@ -112,7 +112,7 @@ _apply_version = 'semversioner release'
 type_check = 'mypy -p pywhy_graphs --config-file pyproject.toml'
 unit_test = 'pytest ./pywhy_graphs ./integration_tests --cov=pywhy_graphs --cov-report=xml --cov-config=pyproject.toml'
 integration_test = 'pytest ./integration_tests'
-build_docs = 'make -C docs clean html'
+build_docs = 'make -C docs clean html-noplot'
 
 [[tool.poe.tasks.lint]]
 sequence = ['_flake8', '_bandit', '_codespell', '_pydocstyle']
@@ -180,7 +180,12 @@ filterwarnings = []
 branch = true
 cover_pylib = false
 source = ['pywhy_graphs']
-omit = ['**/__init__.py', '**/tests/**']
+omit = [
+    '**/__init__.py',
+    'pywhy_graphs/config.py',
+    '**/networkxprotocol.py',
+    '**/tests/**',
+]
 
 [tool.coverage.report]
 exclude_lines = ['pragma: no cover', 'if __name__ == .__main__.:']

diff --git a/pywhy_graphs/algorithms/tests/test_cyclic.py b/pywhy_graphs/algorithms/tests/test_cyclic.py
@@ -1,9 +1,17 @@
+from itertools import chain, combinations
+
 import networkx as nx
 
 import pywhy_graphs
 import pywhy_graphs.networkx as pywhy_nx
 
 
+def powerset(iterable):
+    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
+    s = list(iterable)
+    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
+
+
 def test_acyclification():
     """Test acyclification procedure as specified in :footcite:`Mooij2020cyclic`.
 
@@ -62,3 +70,31 @@ def test_acyclification():
     for edge_type, graph in acyclic_G.get_graphs().items():
         expected_graph = expected_G.get_graphs(edge_type)
         assert nx.is_isomorphic(graph, expected_graph)
+
+
+def test_sigma_separated():
+    """Test sigma-separated procedure.
+
+    Note: sigma-separation is impossible within a cycle (i.e. same
+    strongly connected component).
+    """
+    # create a circular graph from 0 -> ... -> 4 -> 0
+    cyclic_G = nx.circulant_graph(5, offsets=[1], create_using=nx.DiGraph)
+    cyclic_G = pywhy_nx.MixedEdgeGraph(graphs=[cyclic_G], edge_types=["directed"])
+    cyclic_G.add_edge_type(nx.Graph(), edge_type="bidirected")
+
+    for (u, v) in combinations(cyclic_G.nodes, 2):
+        other_nodes = set(cyclic_G.nodes)
+        other_nodes.remove(u)
+        other_nodes.remove(v)
+        for z in powerset(other_nodes):
+            assert not pywhy_graphs.sigma_separated(cyclic_G, {u}, {v}, set(z))
+
+    # on the other hand, if there is a descendant of a node within the cycle,
+    # we can sigma-separate
+    cyclic_G.add_edge(3, "x", edge_type="directed")
+    other_nodes = set(cyclic_G.nodes)
+    other_nodes.remove(3)
+    other_nodes.remove("x")
+    for u in other_nodes:
+        assert pywhy_graphs.sigma_separated(cyclic_G, {u}, {"x"}, {3})
diff --git a/pywhy_graphs/classes/timeseries/__init__.py b/pywhy_graphs/classes/timeseries/__init__.py
@@ -1,4 +1,5 @@
 from .base import BaseTimeSeriesGraph
+from .conversion import numpy_to_tsgraph, tsgraph_to_numpy
 from .cpdag import StationaryTimeSeriesCPDAG
 from .mixededge import StationaryTimeSeriesMixedEdgeGraph, TimeSeriesMixedEdgeGraph
 from .pag import StationaryTimeSeriesPAG

diff --git a/pywhy_graphs/classes/timeseries/conversion.py b/pywhy_graphs/classes/timeseries/conversion.py
@@ -0,0 +1,98 @@
+from typing import List
+
+import numpy as np
+
+from pywhy_graphs.typing import Node
+
+from .timeseries import StationaryTimeSeriesGraph
+
+
+def tsgraph_to_numpy(G, var_order: List[Node] = None):
+    """Convert stationary timeseries graph to numpy array.
+
+    Parameters
+    ----------
+    G : StationaryTimeSeriesGraph
+        A stationary timeseries graph. Can be undirected, or directed.
+    var_order : list of Node, optional
+        The variable order to order the rows and columns of the first two
+        axes of ``ts_graph_arr``.
+
+    Returns
+    -------
+    ts_graph_arr : ArrayLike of shape (n_variables, n_variables, max_lag + 1)
+        The resulting 3D numpy array representing the stationary time-series
+        graph. Currently, we do not map different edges to different values.
+        The rows are considered the "from nodes" and the columns are considered
+        the "to nodes".
+    """
+    # then we convert this into an array of 1's and 0's
+    # we maintain a lagged-order of the nodes, so that way
+    # reshaping into a 3D array works properly
+    if var_order is None:
+        var_order = list(G.variables)
+    n_variables = len(var_order)
+    max_lag = G.max_lag
+
+    ts_graph_arr = np.zeros((n_variables, n_variables, max_lag + 1))
+
+    for node_idx, node_x in enumerate(var_order):
+        for node_jdx, node_y in enumerate(var_order):
+            for lag in range(max_lag + 1):
+                if G.has_edge((node_x, -lag), (node_y, 0)):
+                    ts_graph_arr[node_idx, node_jdx, lag] = 1
+    return ts_graph_arr
+
+
+def numpy_to_tsgraph(arr, var_order: List[Node] = None, create_using=StationaryTimeSeriesGraph):
+    """Convert 3D numpy array into a stationary time-series graph.
+
+    Parameters
+    ----------
+    arr : ArrayLike of shape (n_variables, n_variables, max_lag + 1)
+        The resulting 3D numpy array representing the stationary time-series
+        graph. The rows are considered the "from nodes" and the columns are considered
+        the "to nodes".
+    var_order : List[Node], optional
+        The variables in order of the rows/columns of the first two axes of ``arr``.
+        By default None, which we will then name those nodes ``(0, ..., n_variables)``.
+    create_using : PyWhy_Graph graph constructor, optional (default=StationaryTimeSeriesGraph)
+        Graph type to create. If graph instance, then cleared before populated.
+
+    Returns
+    -------
+    G : StationaryTimeSeriesGraph
+        The resulting stationary timeseries graph.
+    """
+    n_variables, _, max_lag = arr.shape
+    if n_variables != arr.shape[1]:
+        raise RuntimeError(
+            f"The first two axes of ``arr`` should be the number of variables. "
+            f"It is {arr.shape} right now."
+        )
+    max_lag -= 1
+
+    if var_order is None:
+        var_order = list(range(n_variables))
+
+    # XXX: do some error checking on the values within arr
+    # if not all(val in VALUE_TO_EDGE_MAPPING for val in arr.flatten()):
+    #     raise ValueError(f'The 3D stationary timeseries array must only contain values '
+    #         f'within our value -> edge mapping: {VALUE_TO_EDGE_MAPPING}.')
+
+    # first we sample the time-series graph
+    G = create_using(max_lag=max_lag)
+    G.add_variables_from(var_order)
+
+    # now we add edges in according to the array
+    for non_lag_node in G.nodes_at(t=0):
+        to_idx = var_order.index(non_lag_node[0])
+        for lag in range(0, max_lag + 1):
+            for lag_node in G.nodes_at(t=lag):
+                from_idx = var_order.index(lag_node[0])
+
+                # XXX: improve to allow different edge types
+                if arr[from_idx, to_idx, lag] > 0:
+                    G.add_edge(lag_node, non_lag_node)
+
+    return G