Skip to content

[ENH] update ci and fix lint warnings #321

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 2 additions & 88 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,91 +9,5 @@ on:
- master

jobs:
build:
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
timeout-minutes: 15
name: ${{ matrix.name }} (${{ matrix.os }}, ${{ matrix.python-version }})
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04, macos-10.15, windows-2019]
python-version: [3.7, 3.8]
tox_env: [py-orange-released]
experimental: [false]
name: [Released]
include:
- os: windows-latest
python-version: 3.8
tox_env: py-orange-released
experimental: true
name: Windows10
- os: macos-11.0
python-version: 3.8
tox_env: py-orange-released
experimental: true
name: Big Sur

- os: windows-2019
python-version: 3.7
tox_env: py-orange-oldest
experimental: false
name: Oldest
- os: macos-10.15
python-version: 3.7
tox_env: py-orange-oldest
name: Oldest
experimental: false
- os: ubuntu-18.04
python-version: 3.7
tox_env: py-orange-oldest
name: Oldest
experimental: false

- os: windows-2019
python-version: 3.8
tox_env: py-orange-latest
experimental: false
name: Latest
- os: macos-10.15
python-version: 3.8
tox_env: py-orange-latest
experimental: false
name: Latest
- os: ubuntu-18.04
python-version: 3.8
tox_env: py-orange-latest
experimental: false
name: Latest

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade tox tox-pip-version

- name: Set environment variable
# this step sets QT_QPA_PLATFORM env which is mandatory on Linux and
# it is skipped on Windows since it produces SEGFAULT when WebView used
if: runner.os != 'Windows'
run: |
echo "QT_QPA_PLATFORM=offscreen" >> $GITHUB_ENV

- name: Test with Tox
run: |
tox -e ${{ matrix.tox_env }}

- name: Upload code coverage
if: |
matrix.python-version == '3.8' &&
matrix.os == 'ubuntu-18.04' &&
matrix.tox_env == 'py-orange-released'
run: |
pip install codecov
codecov
test:
uses: biolab/orange-ci-cd/.github/workflows/test-addons.yml@master
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ repos:
hooks:
- id: flake8
language_version: python3
additional_dependencies: ['flake8-comprehensions', 'pep8-naming', 'flake8-black']
additional_dependencies: ['flake8-comprehensions', 'flake8-bugbear', 'pep8-naming', 'flake8-black']
107 changes: 85 additions & 22 deletions orangecontrib/bioinformatics/annotation/annotate_projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def cluster_data(coordinates, clustering_algorithm=DBSCAN, **kwargs):
clusters = learner(coordinates)
if not isinstance(clusters, np.ndarray): # old clustering method
clusters = clusters(coordinates)
clusters = np.array(list(map(int, map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0])))).flatten()
clusters = np.array(
list(
map(int, map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))
)
).flatten()

# sort classes in descending order base on number of cases in the cluster
sorted_clust_idx = [v for v, _ in Counter(clusters).most_common() if v != -1]
Expand All @@ -85,7 +89,12 @@ def cluster_data(coordinates, clustering_algorithm=DBSCAN, **kwargs):

# create the table
new_domain = Domain(
[DiscreteVariable("Clusters", values=["C{}".format(i) for i in range(1, len(sorted_clust_idx) + 1)])]
[
DiscreteVariable(
"Clusters",
values=["C{}".format(i) for i in range(1, len(sorted_clust_idx) + 1)],
)
]
)
return Table(new_domain, new_clustering.reshape((-1, 1)))

Expand Down Expand Up @@ -116,7 +125,10 @@ def assign_labels(clusters, annotations, labels_per_cluster):
clusters_unique = set(clusters.domain[0].values)

if len(annotations.domain) == 0:
return {}, Table(Domain([DiscreteVariable("Annotation", values=[])]), np.ones((len(clusters), 1)) * np.nan)
return {}, Table(
Domain([DiscreteVariable("Annotation", values=[])]),
np.ones((len(clusters), 1)) * np.nan,
)

labels = np.array(list(map(str, annotations.domain.attributes)))

Expand All @@ -134,7 +146,12 @@ def assign_labels(clusters, annotations, labels_per_cluster):

annotations_clusters = {}
for cl in clusters_unique:
mask = np.array(list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))).flatten() == cl
mask = (
np.array(
list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))
).flatten()
== cl
)
labels_cl = items_annotations[mask]
# remove nans from labels
labels_cl_filtered = labels_cl[~(labels_cl == "")]
Expand All @@ -143,15 +160,17 @@ def assign_labels(clusters, annotations, labels_per_cluster):
common_labels = counts.most_common(labels_per_cluster)

if len(common_labels) > 0:
annotations_clusters[cl] = [(l, c / len(labels_cl)) for l, c in common_labels]
annotations_clusters[cl] = [
(label, c / len(labels_cl)) for label, c in common_labels
]

# pack item annotations to Table
nan_mask = items_annotations == ""
values, indices = np.unique(items_annotations[~nan_mask], return_inverse=True)
corrected_idx = np.ones(items_annotations.shape) * np.nan
corrected_idx[~nan_mask] = indices
domain = Domain([DiscreteVariable("Annotation", values=values)])
item_annotations = Table(domain, corrected_idx.reshape((-1, 1)))
item_annotations = Table.from_list(domain, corrected_idx.reshape((-1, 1)))

return annotations_clusters, item_annotations

Expand All @@ -177,7 +196,12 @@ def labels_locations(coordinates, clusters):
clusters_unique = set(clusters.domain[0].values) - {"-1"} # -1 is not clustered
locations = {}
for cl in clusters_unique:
mask = np.array(list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))).flatten() == cl
mask = (
np.array(
list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))
).flatten()
== cl
)
cl_coordinates = coordinates.X[mask, :]
x, y = 1 / 2 * (np.min(cl_coordinates, axis=0) + np.max(cl_coordinates, axis=0))
locations[cl] = (x, y)
Expand Down Expand Up @@ -287,7 +311,9 @@ def _find_hull(edges_list, points_list, starting_edge):
# a polygon
ang = -1
for i in range(ind_left, ind_right):
cur_ang = _angle((poly[-2], poly[-1]), (poly[-1], points_list[edges_list[i][1]]))
cur_ang = _angle(
(poly[-2], poly[-1]), (poly[-1], points_list[edges_list[i][1]])
)
if cur_ang > ang:
ang = cur_ang
ind_left = i
Expand Down Expand Up @@ -410,7 +436,9 @@ def add_edge(edges_list, i, j):
return polygon

hulls = {}
clusters_array = np.array(list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0])))
clusters_array = np.array(
list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0]))
)
for cl in set(clusters_array) - {"None", "?"}:
points = coordinates.X[clusters_array == cl]

Expand Down Expand Up @@ -440,7 +468,9 @@ def add_edge(edges_list, i, j):
# buffer the hull fro epsilon * -2
pco2 = pyclipper.PyclipperOffset()
pco2.AddPath(im_solution[0], pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
solution = pyclipper.scale_from_clipper(pco2.Execute(epsilon * scaling_factor * (-2)), scaling_factor)
solution = pyclipper.scale_from_clipper(
pco2.Execute(epsilon * scaling_factor * (-2)), scaling_factor
)

hulls[cl] = np.array(solution).reshape(-1, 2)
return hulls
Expand All @@ -450,21 +480,44 @@ def _filter_clusters(clusters, clusters_meta):
"""
Function removes clusters that does not has any labels
"""
clust_map = {c: i for i, c in enumerate(c for c in clusters.domain["Clusters"].values if c in clusters_meta)}
clust_map = {
c: i
for i, c in enumerate(
c for c in clusters.domain["Clusters"].values if c in clusters_meta
)
}

# change cluster indices
clust_idx = np.zeros(clusters.X.shape) * np.nan
for i, cl in enumerate(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0])):
for i, cl in enumerate(
map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0])
):
clust_idx[i, 0] = clust_map.get(cl, np.nan)
new_clusters = Table(
Domain([DiscreteVariable("Clusters", values=clusters.domain["Clusters"].values[: len(clust_map)])]), clust_idx
Domain(
[
DiscreteVariable(
"Clusters",
values=clusters.domain["Clusters"].values[: len(clust_map)],
)
]
),
clust_idx,
)
# change cluster names in metas
new_clusters_meta = {"C{}".format(clust_map[cl] + 1): v for cl, v in clusters_meta.items()}
new_clusters_meta = {
"C{}".format(clust_map[cl] + 1): v for cl, v in clusters_meta.items()
}
return new_clusters, new_clusters_meta


def annotate_projection(annotations, coordinates, clustering_algorithm=DBSCAN, labels_per_cluster=3, **kwargs):
def annotate_projection(
annotations,
coordinates,
clustering_algorithm=DBSCAN,
labels_per_cluster=3,
**kwargs
):
"""
Function cluster the data based on coordinates, and assigns a certain number
of labels per cluster. Each cluster gets `labels_per_cluster` number of most
Expand Down Expand Up @@ -493,10 +546,14 @@ def annotate_projection(annotations, coordinates, clustering_algorithm=DBSCAN, l
The coordinates for locating the label. Dictionary with cluster index
as a key and tuple (x, y) as a value.
"""
assert len(annotations) == len(coordinates), "Number of coordinates does not match to number of annotations"
assert len(annotations) == len(
coordinates
), "Number of coordinates does not match to number of annotations"
# sklearn clustering want to have one example
assert len(coordinates) > 0, "At least one data point need to be provided"
assert len(coordinates.domain) > 0, "Coordinates need to have at least one attribute"
assert (
len(coordinates.domain) > 0
), "Coordinates need to have at least one attribute"

eps = kwargs.get("eps", get_epsilon(coordinates))
if clustering_algorithm == DBSCAN:
Expand All @@ -506,7 +563,9 @@ def annotate_projection(annotations, coordinates, clustering_algorithm=DBSCAN, l
clusters = cluster_data(coordinates, clustering_algorithm, **kwargs)

# assign top n labels to group
annotations_cl, item_annotations = assign_labels(clusters, annotations, labels_per_cluster)
annotations_cl, item_annotations = assign_labels(
clusters, annotations, labels_per_cluster
)

labels_loc = labels_locations(coordinates, clusters)

Expand Down Expand Up @@ -569,7 +628,8 @@ def point_in_polygon_test(test_point, polygon_points):
is_inside = False

for (x1, y1), (x2, y2) in zip(
polygon_points, np.concatenate((polygon_points[1:], polygon_points[:1]), axis=0)
polygon_points,
np.concatenate((polygon_points[1:], polygon_points[:1]), axis=0),
):
# ray crosses the edge if test_y between both y from an edge
# and if intersection on the right of the test_x
Expand All @@ -594,18 +654,21 @@ def point_in_polygon_test(test_point, polygon_points):
# create the table
new_domain = Domain(
[
DiscreteVariable("Clusters", values=sorted(list(hulls.keys())))
DiscreteVariable("Clusters", values=sorted(hulls.keys()))
if cluster_attribute is None
else cluster_attribute
]
)
return Table(new_domain, np.array(list(map(new_domain[0].to_val, clusters))).reshape(-1, 1))
return Table(
new_domain, np.array(list(map(new_domain[0].to_val, clusters))).reshape(-1, 1)
)


if __name__ == "__main__":
# run hull creation at Iris data
data = Table("iris")[:, 2:4]
clustered_data = Table(
Domain([DiscreteVariable("cl", values=["1", "2", "3"])]), [[0]] * 50 + [[1]] * 50 + [[2]] * 50
Domain([DiscreteVariable("cl", values=["1", "2", "3"])]),
[[0]] * 50 + [[1]] * 50 + [[2]] * 50,
)
compute_concave_hulls(data, clustered_data, epsilon=0.5)
Loading