Skip to content

Commit fc0c82e

Browse files
authored
Merge pull request #1 from Minitour/feature/code-enhancements
Code refactoring
2 parents 0639a8b + f66d11d commit fc0c82e

14 files changed

+264
-118
lines changed

.github/workflows/release.yml

+34-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
name: Publish Python Package
1+
name: CI Pipeline
22

33
on:
44
push:
5+
branches: # Run on pushes to any branch
6+
- '*'
7+
pull_request: # Run on pull requests to any branch
58
branches:
6-
- master
9+
- '*'
710

811
jobs:
9-
build:
12+
test:
13+
name: Run Unit Tests
1014
runs-on: ubuntu-latest
1115

1216
steps:
@@ -15,13 +19,38 @@ jobs:
1519
- name: Set up Python
1620
uses: actions/setup-python@v2
1721
with:
18-
python-version: '3.9'
22+
python-version: '3.12'
1923

2024
- name: Install dependencies
2125
run: |
2226
python -m pip install --upgrade pip
2327
pip install poetry
24-
poetry install
28+
poetry install --with test
29+
30+
- name: Run Tests
31+
run: |
32+
poetry run pytest
33+
34+
35+
release:
36+
name: Publish Python Package
37+
needs: test # Ensure tests pass before publishing
38+
runs-on: ubuntu-latest
39+
if: github.ref == 'refs/heads/master'
40+
41+
steps:
42+
- uses: actions/checkout@v2
43+
44+
- name: Set up Python
45+
uses: actions/setup-python@v2
46+
with:
47+
python-version: '3.12'
48+
49+
- name: Install dependencies
50+
run: |
51+
python -m pip install --upgrade pip
52+
pip install poetry
53+
poetry install
2554
2655
- name: Publish package
2756
env:

README.md

+34-5
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,25 @@ model = ChatGptModelParaphrase(api_key='sk-xyz', model='gpt-4o', temperature=0.7
4242

4343
```python
4444
from verbalizer.process import Processor
45+
from verbalizer.vocabulary import Vocabulary
46+
from verbalizer import Verbalizer
4547

46-
ontology = 'pizza.ttl'
47-
name = 'pizza'
48-
processor = Processor(llm=model, vocab_ignore=ignore, vocab_rephrased=rephrased, min_statements=1)
49-
processor.process(name, ontology, output_dir='/path/to/my/output')
48+
ontology = Processor.from_file('pizza.ttl')
49+
50+
# create vocabulary and verbalizer
51+
vocab = Vocabulary(ontology, ignore=ignore, rephrased=rephrased)
52+
verbalizer = Verbalizer(vocab)
53+
54+
# start verbalization process
55+
results = Processor.verbalize_with(verbalizer, namespace="pizza", output_dir="./output")
5056
```
5157

58+
## Examples
59+
60+
<details>
61+
62+
<summary>Expand to see examples</summary>
63+
5264
<table border="1">
5365
<tr>
5466
<th>OWL Fragment</th>
@@ -175,4 +187,21 @@ processor.process(name, ontology, output_dir='/path/to/my/output')
175187
Chicken topping is a type of meat topping that has at least some mild spiciness. It is different from pepperoni sausage topping, hot spiced beef topping, and ham topping.
176188
</td>
177189
</tr>
178-
</table>
190+
</table>
191+
192+
</details>
193+
194+
195+
## Citation
196+
197+
```
198+
@inproceedings{zaitoun2024generating,
199+
title={Generating Ontology-Learning Training-Data through Verbalization},
200+
author={Zaitoun, Antonio and Sagi, Tomer and Peleg, Mor},
201+
booktitle={Proceedings of the AAAI Symposium Series},
202+
volume={4},
203+
number={1},
204+
pages={233--241},
205+
year={2024}
206+
}
207+
```

evaluations/processed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,4 +113,4 @@ def test_evaluation(self):
113113
for ontology_name, contents in ontologies.items():
114114
file = contents['file']
115115
sampler = CustomSampler(samples=contents['samples'])
116-
processor.process(ontology_name, file, data_sampler=sampler)
116+
processor.verbalize_with(ontology_name, file, sampler=sampler)

playground.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from verbalizer.nlp import ChatGptModelParaphrase, LlamaModelParaphrase
55
from verbalizer.process import Processor
66
from verbalizer.sampler import Sampler
7+
from verbalizer.verbalizer import Verbalizer
8+
from verbalizer.vocabulary import Vocabulary
79

810
logging.basicConfig(level=logging.INFO)
911
logger = logging.getLogger(__name__)
@@ -87,9 +89,19 @@
8789

8890
sampler = Sampler(sample_n=100, seed=42)
8991

92+
ontologies = [
93+
('people', Processor.from_file('./data/people.ttl')),
94+
('pizza', Processor.from_file('./data/pizza.ttl')),
95+
('mondo', Processor.from_file('./data/mondo.owl')),
96+
('fma', Processor.from_file('./data/fma.owl')),
97+
]
98+
99+
vocabularies = [
100+
(namespace, Vocabulary(ontology, ignore=ignore, rephrased=rephrased))
101+
for namespace, ontology in ontologies
102+
]
103+
90104
for model in models:
91-
processor = Processor(llm=model, vocab_ignore=ignore, vocab_rephrased=rephrased, min_statements=1)
92-
processor.process('people', './data/people.ttl')
93-
processor.process('pizza', './data/pizza.ttl')
94-
processor.process('mondo', './data/mondo.owl', data_sampler=sampler)
95-
processor.process('fma', './data/fma.owl', data_sampler=sampler)
105+
for namespace, vocabulary in vocabularies:
106+
verbalizer = Verbalizer(vocabulary, language_model=model)
107+
results = Processor.verbalize_with(verbalizer, namespace=namespace, output_dir="./output", sampler=sampler)

pyproject.toml

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "ontology-verbalizer"
3-
version = "1.0.1"
3+
version = "1.1.0"
44
description = "A Python package for ontology verbalization"
55
authors = ["Antonio Zaitoun <[email protected]>"]
66
license = "MIT"
@@ -10,12 +10,15 @@ packages = [
1010
]
1111
repository = "https://github.com/Minitour/ontology-verbalizer"
1212
[tool.poetry.dependencies]
13-
python = "^3.9"
13+
python = "^3.12"
1414
rdflib = "~7.0.0"
1515
openai = "~1.12.0"
1616
pandas = "~2.2.0"
1717
tqdm = "~4.66.2"
1818

19+
[tool.poetry.group.test.dependencies]
20+
pytest = "~8.3.4"
21+
1922
[build-system]
2023
requires = ["poetry-core>=1.0.0"]
2124
build-backend = "poetry.core.masonry.api"

tests/__init__.py

Whitespace-only changes.

tests/test_verbalization.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import unittest
2+
3+
from rdflib import Graph
4+
5+
from verbalizer.process import Processor
6+
from verbalizer.sampler import Sampler
7+
from verbalizer.vocabulary import Vocabulary
8+
from verbalizer import Verbalizer
9+
10+
rename_iri = {
11+
'http://www.w3.org/2002/07/owl#equivalentClass': 'is same as',
12+
'http://www.w3.org/2000/01/rdf-schema#subClassOf': 'is a type of',
13+
'http://www.w3.org/2002/07/owl#intersectionOf': 'all of',
14+
'http://www.w3.org/2002/07/owl#unionOf': 'any of',
15+
'http://www.w3.org/2002/07/owl#disjointWith': 'is different from',
16+
'http://www.w3.org/2002/07/owl#withRestrictions': 'must be'
17+
}
18+
ignore_iri = {
19+
'http://www.w3.org/2002/07/owl#onDatatype',
20+
'http://www.w3.org/2000/01/rdf-schema#seeAlso',
21+
'http://www.w3.org/2000/01/rdf-schema#label',
22+
'http://www.w3.org/2000/01/rdf-schema#comment',
23+
'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
24+
'http://www.w3.org/2000/01/rdf-schema#isDefinedBy',
25+
'http://www.w3.org/2003/06/sw-vocab-status/ns#term_status',
26+
'http://www.w3.org/2000/01/rdf-schema#Class'
27+
}
28+
29+
30+
class TestVerbalization(unittest.TestCase):
31+
32+
def test_verbalization(self):
33+
# graph
34+
ontology = Processor.from_file('./data/foaf.owl')
35+
36+
# create vocabulary
37+
vocab = Vocabulary(ontology, ignore=ignore_iri, rephrased=rename_iri)
38+
39+
# create verbalizer
40+
verbalizer = Verbalizer(vocab)
41+
42+
results = Processor.verbalize_with(verbalizer, namespace='foaf')
43+
self.assertEqual(12, len(results))
44+
45+
# Add default prefix (won't work without this)
46+
fragment_sample = '@prefix : <https://zaitoun.dev#> .\n' + results[0]['fragment']
47+
g = Graph()
48+
g.parse(data=fragment_sample, format="turtle")
49+
50+
self.assertEqual(7, len(list(g.triples((None, None, None)))))
51+
52+
def test_verbalization_with_sampler(self):
53+
# graph
54+
ontology = Processor.from_file('./data/foaf.owl')
55+
56+
# create vocabulary
57+
vocab = Vocabulary(ontology, ignore=ignore_iri, rephrased=rename_iri)
58+
59+
# create verbalizer
60+
verbalizer = Verbalizer(vocab)
61+
62+
sampler = Sampler(sample_n=10, seed=42)
63+
results = Processor.verbalize_with(verbalizer, namespace='foaf', sampler=sampler)
64+
65+
# although we sampled 10, only 7 were applicable.
66+
self.assertEqual(7, len(results))

verbalizer/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .verbalizer import Verbalizer

verbalizer/patterns/owl_disjoint.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from rdflib import URIRef
22

33
from verbalizer.patterns import Pattern
4-
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge
4+
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge, default_patterns
55
from verbalizer.vocabulary import Vocabulary
66

77

@@ -46,7 +46,8 @@ def normalize(self, node: VerbalizationNode, triple_collector):
4646
relation_display = self.vocab.get_relationship_label(relation)
4747

4848
if relation_display == Vocabulary.IGNORE_VALUE:
49-
triple_collector.append((node.concept, relation, obj))
49+
if self.vocab.should_keep(relation):
50+
triple_collector.append((node.concept, relation, obj))
5051
continue
5152

5253
next_node = VerbalizationNode(obj, parent_path=node.get_parent_path() + [(node.concept, relation)])
@@ -56,3 +57,5 @@ def normalize(self, node: VerbalizationNode, triple_collector):
5657
triple_collector.append((node.concept, relation, obj))
5758

5859
return [(reference.relationship, reference.node.concept) for reference in node.references]
60+
61+
default_patterns.append(OwlDisjointWith)

verbalizer/patterns/owl_first_rest.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from verbalizer.patterns import Pattern
22
from rdflib import URIRef
33

4-
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge
4+
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge, default_patterns
55

66

77
class OwlFirstRestPattern(Pattern):
@@ -37,3 +37,5 @@ def normalize(self, node: VerbalizationNode, triple_collector):
3737
current = rest_node
3838

3939
return [(reference.relationship, reference.node.concept) for reference in node.references]
40+
41+
default_patterns.append(OwlFirstRestPattern)

verbalizer/patterns/owl_restriction.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from rdflib import URIRef
22

33
from verbalizer.patterns import Pattern
4-
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge
4+
from verbalizer.verbalizer import VerbalizationNode, VerbalizationEdge, default_patterns
55

66

77
class OwlRestrictionPattern(Pattern):
@@ -127,3 +127,5 @@ def _handle_cardinality(self, quantifier_relation, property_relation, obj_litera
127127
return f'has at least {literal_value}{on_class_label}{property_relation_label}{relation_plural_s}'
128128
elif quantifier_relation.endswith('maxCardinality') or quantifier_relation.endswith('maxQualifiedCardinality'):
129129
return f'has at most {literal_value}{on_class_label}{property_relation_label}{relation_plural_s}'
130+
131+
default_patterns.append(OwlRestrictionPattern)

0 commit comments

Comments
 (0)