Skip to content

Commit e2f4bb7

Browse files
Merge pull request #1 from jarrodmillman/basic-package
Add basic package for query.py script from devstats-data
2 parents 1a28150 + 8a8d2b8 commit e2f4bb7

File tree

5 files changed

+377
-0
lines changed

5 files changed

+377
-0
lines changed

.github/workflows/lint.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: lint
2+
3+
on: [push, pull_request]
4+
5+
jobs:
6+
format:
7+
runs-on: ${{ matrix.os }}-latest
8+
strategy:
9+
matrix:
10+
os: [ubuntu]
11+
python-version: ["3.10"]
12+
13+
steps:
14+
- uses: actions/checkout@v3
15+
- name: Set up Python ${{ matrix.python-version }}
16+
uses: actions/setup-python@v4
17+
with:
18+
python-version: ${{ matrix.python-version }}
19+
20+
- uses: actions/cache@v3
21+
with:
22+
path: ~/.cache/pip
23+
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
24+
restore-keys: |
25+
${{ runner.os }}-pip-
26+
27+
- name: Install dependencies
28+
run: |
29+
python -m pip install --upgrade pip
30+
pip install .[lint]
31+
32+
- name: Lint
33+
run: pre-commit run --all-files --show-diff-on-failure --color always

.pre-commit-config.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Install pre-commit hooks via
2+
# pre-commit install
3+
4+
repos:
5+
- repo: https://github.com/pre-commit/pre-commit-hooks
6+
rev: v4.4.0
7+
hooks:
8+
- id: check-added-large-files
9+
- id: check-ast
10+
- id: check-builtin-literals
11+
- id: check-case-conflict
12+
- id: check-json
13+
- id: check-toml
14+
- id: check-yaml
15+
args: [--allow-multiple-documents]
16+
- id: debug-statements
17+
- id: end-of-file-fixer
18+
- id: mixed-line-ending
19+
- id: trailing-whitespace
20+
21+
- repo: https://github.com/psf/black
22+
rev: 23.3.0
23+
hooks:
24+
- id: black
25+
26+
- repo: https://github.com/adamchainz/blacken-docs
27+
rev: 1.13.0
28+
hooks:
29+
- id: blacken-docs
30+
31+
- repo: https://github.com/pre-commit/mirrors-prettier
32+
rev: v2.7.1
33+
hooks:
34+
- id: prettier
35+
files: \.(css|html|md|yml|yaml)
36+
args: [--prose-wrap=preserve]
37+
38+
- repo: https://github.com/asottile/pyupgrade
39+
rev: v3.4.0
40+
hooks:
41+
- id: pyupgrade
42+
args: [--py310-plus]

RELEASE.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Release process for `devstats`
2+
3+
## Introduction
4+
5+
Example `version`
6+
7+
- 1.8.dev0 # development version of 1.8 (release candidate 1)
8+
- 1.8rc1 # 1.8 release candidate 1
9+
- 1.8rc2.dev0 # development version of 1.8 release candidate 2
10+
- 1.8 # 1.8 release
11+
- 1.9.dev0 # development version of 1.9 (release candidate 1)
12+
13+
## Process
14+
15+
- Update and review `CHANGELOG.md`:
16+
17+
gem install github_changelog_generator
18+
github_changelog_generator -u scientific-python -p pydata --since-tag=<last tag>
19+
20+
- Update `version` in `pyproject.toml`.
21+
22+
- Commit changes:
23+
24+
git add pyproject.toml CHANGELOG.md
25+
git commit -m 'Designate <version> release'
26+
27+
- Add the version number (e.g., `1.2.0`) as a tag in git:
28+
29+
git tag -s [-u <key-id>] v<version> -m 'signed <version> tag'
30+
31+
If you do not have a gpg key, use -u instead; it is important for
32+
Debian packaging that the tags are annotated
33+
34+
- Push the new meta-data to github:
35+
36+
git push --tags origin main
37+
38+
where `origin` is the name of the `github.com:scientific-python/pydata
39+
repository
40+
41+
- Review the github release page:
42+
43+
https://github.com/scientific-python/pydata/releases
44+
45+
- Publish on PyPi:
46+
47+
git clean -fxd
48+
pip install -U build twine wheel
49+
python -m build --sdist --wheel
50+
twine upload -s dist/*
51+
52+
- Update `version` in `pyproject.toml`.
53+
54+
- Commit changes:
55+
56+
git add pyproject.toml
57+
git commit -m 'Bump version'
58+
git push origin main

devstats/__init__.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import os
2+
import requests
3+
import json
4+
import click
5+
6+
token = os.environ["GRAPH_API_KEY"]
7+
endpoint = r"https://api.github.com/graphql"
8+
headers = {"Authorization": "bearer {}".format(token)}
9+
10+
11+
def load_query_from_file(fname, repo_owner="numpy", repo_name="numpy"):
12+
"""
13+
Load an 'issue' query from file and set the target repository, where
14+
the target repository has the format:
15+
16+
https://github.com/<repo_owner>/<repo_name>
17+
18+
Parameters
19+
----------
20+
fname : str
21+
Path to a text file containing a valid issue query according to the
22+
GitHub GraphQL schema.
23+
repo_owner : str
24+
Owner of target repository on GitHub. Default is 'numpy'.
25+
repo_name : str
26+
Name of target repository on GitHub. Default is 'numpy'.
27+
28+
Returns
29+
-------
30+
query : str
31+
Query loaded from file in text form suitable for ``send_query``.
32+
33+
Notes
34+
-----
35+
This function expects the query to have a specific form and will not work
36+
for general GitHub GraphQL queries. See ``examples/`` for some valid
37+
templated issue queries.
38+
"""
39+
with open(fname, "r") as fh:
40+
query = fh.read()
41+
# Set target repo from template
42+
query = query.replace("_REPO_OWNER_", repo_owner)
43+
query = query.replace("_REPO_NAME_", repo_name)
44+
return query
45+
46+
47+
def send_query(query, query_type, cursor=None):
48+
"""
49+
Send a GraphQL query via requests.post
50+
51+
No validation is done on the query before sending. GitHub GraphQL is
52+
supported with the `cursor` argument.
53+
54+
Parameters
55+
----------
56+
query : str
57+
The GraphQL query to be sent
58+
query_type : {"issues", "pullRequests"}
59+
The object being queried according to the GitHub GraphQL schema.
60+
Currently only issues and pullRequests are supported
61+
cursor : str, optional
62+
If given, then the cursor is injected into the query to support
63+
GitHub's GraphQL pagination.
64+
65+
Returns
66+
-------
67+
dict
68+
The result of the query (json) parsed by `json.loads`
69+
70+
Notes
71+
-----
72+
This is intended mostly for internal use within `get_all_responses`.
73+
"""
74+
# TODO: Expand this, either by parsing the query type from the query
75+
# directly or manually adding more query_types to the set
76+
if query_type not in {"issues", "pullRequests"}:
77+
raise ValueError(
78+
"Only 'issues' and 'pullRequests' queries are currently supported"
79+
)
80+
# TODO: Generalize this
81+
# WARNING: The cursor injection depends on the specific structure of the
82+
# query, this is the main reason why query types are limited to issues/PRs
83+
if cursor is not None:
84+
cursor_insertion_key = query_type + "("
85+
cursor_ind = query.find(cursor_insertion_key) + len(cursor_insertion_key)
86+
query = query[:cursor_ind] + f'after:"{cursor}", ' + query[cursor_ind:]
87+
# Build request payload
88+
payload = {'query' : ''.join(query.split('\n'))}
89+
response = requests.post(endpoint, json=payload, headers=headers)
90+
return json.loads(response.content)
91+
92+
def get_all_responses(query, query_type):
93+
"""
94+
Helper function to bypass GitHub GraphQL API node limit.
95+
"""
96+
# Get data from a single response
97+
initial_data = send_query(query, query_type)
98+
data, last_cursor, total_count = parse_single_query(initial_data, query_type)
99+
print(f"Retrieving {len(data)} out of {total_count} values...")
100+
# Continue requesting data (with pagination) until all are acquired
101+
while len(data) < total_count:
102+
rdata = send_query(query, query_type, cursor=last_cursor)
103+
pdata, last_cursor, _ = parse_single_query(rdata, query_type)
104+
data.extend(pdata)
105+
print(f"Retrieving {len(data)} out of {total_count} values...")
106+
print("Done.")
107+
return data
108+
109+
def parse_single_query(data, query_type):
110+
"""
111+
Parse the data returned by `send_query`
112+
113+
.. warning::
114+
115+
Like `send_query`, the logic here depends on the specific structure
116+
of the query (e.g. it must be an issue or PR query, and must have a
117+
total count).
118+
"""
119+
try:
120+
total_count = data['data']['repository'][query_type]['totalCount']
121+
data = data['data']['repository'][query_type]['edges']
122+
last_cursor = data[-1]['cursor']
123+
except KeyError as e:
124+
print(data)
125+
raise e
126+
return data, last_cursor, total_count
127+
128+
129+
class GithubGrabber:
130+
"""
131+
Pull down data via the GitHub APIv.4 given a valid GraphQL query.
132+
"""
133+
134+
def __init__(self, query_fname, query_type, repo_owner="numpy", repo_name="numpy"):
135+
"""
136+
Create an object to send/recv queries related to the issue tracker
137+
for the given repository via the GitHub API v.4.
138+
139+
The repository to query against is given by:
140+
https://github.com/<repo_owner>/<repo_name>
141+
142+
Parameters
143+
----------
144+
query_fname : str
145+
Path to a valid GraphQL query conforming to the GitHub GraphQL
146+
schema
147+
query_type : {"issues", "pullRequests"}
148+
Type of object that is being queried according to the GitHub GraphQL
149+
schema. Currently only "issues" and "pullRequests" are supported.
150+
repo_owner : str
151+
Repository owner. Default is "numpy"
152+
repo_name : str
153+
Repository name. Default is "numpy"
154+
"""
155+
self.query_fname = query_fname
156+
self.query_type = query_type # TODO: Parse this directly from query
157+
self.repo_owner = repo_owner
158+
self.repo_name = repo_name
159+
self.raw_data = None
160+
self.load_query()
161+
162+
def load_query(self):
163+
self.query = load_query_from_file(
164+
self.query_fname, self.repo_owner, self.repo_name
165+
)
166+
167+
def get(self):
168+
"""
169+
Get JSON-formatted raw data from the query.
170+
"""
171+
self.raw_data = get_all_responses(self.query, self.query_type)
172+
173+
def dump(self, outfile):
174+
"""
175+
Dump raw json to `outfile`.
176+
"""
177+
if not self.raw_data:
178+
raise ValueError("raw_data is currently empty, nothing to dump")
179+
180+
with open(outfile, "w") as outf:
181+
json.dump(self.raw_data, outf)
182+
183+
184+
@click.command()
185+
@click.argument('repo_owner')
186+
@click.argument('repo_name')
187+
def main(repo_owner, repo_name):
188+
"""Download and save issue and pr data for `repo_owner`/`repo_name`."""
189+
# Download issue data
190+
issues = GithubGrabber(
191+
'query_examples/issue_activity_since_date.gql',
192+
'issues',
193+
repo_owner=repo_owner,
194+
repo_name=repo_name,
195+
)
196+
issues.get()
197+
issues.dump(f"{repo_name}_issues.json")
198+
# Download PR data
199+
prs = GithubGrabber(
200+
'query_examples/pr_data_query.gql',
201+
'pullRequests',
202+
repo_owner=repo_owner,
203+
repo_name=repo_name,
204+
)
205+
prs.get()
206+
prs.dump(f"{repo_name}_prs.json")
207+
208+
209+
210+
if __name__ == "__main__":
211+
main()

pyproject.toml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
[project]
2+
name = "devstats"
3+
version = "0.1rc0.dev0"
4+
requires-python = ">=3.10"
5+
description = "Developer tool for scientific Python libraries"
6+
license = {file = "LICENSE"}
7+
maintainers = [
8+
{name = "Scientific Python", email = "[email protected]"}
9+
]
10+
classifiers = [
11+
"Development Status :: 3 - Alpha",
12+
"Programming Language :: Python"
13+
]
14+
dependencies = [
15+
"click",
16+
"requests",
17+
"jupyter",
18+
"notebook",
19+
"numpy",
20+
"networkx",
21+
]
22+
23+
[project.scripts]
24+
query = "query.__main__:main"
25+
26+
[project.optional-dependencies]
27+
lint = ["pre-commit >= 3.r32"]
28+
29+
[project.urls]
30+
homepage = "https://github.com/scientific-python/devstats"
31+
32+
[tool.setuptools.packages.find]
33+
include = ["devstats*"]

0 commit comments

Comments
 (0)