Skip to content

Commit 168e1f1

Browse files
authored
Merge pull request #18 from karel-brinda/dev
v0.1.3
2 parents 8f9b1bd + d48ada6 commit 168e1f1

File tree

11 files changed

+196
-45
lines changed

11 files changed

+196
-45
lines changed

.github/workflows/python-package-conda.yml renamed to .github/workflows/ci.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,6 @@ jobs:
2121
- name: Install dependencies
2222
run: |
2323
conda env update --file environment.yml --name base
24-
- name: Test with pytest
24+
- name: Run all tests
2525
run: |
26-
make tests
26+
make test

README.rst

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Attotree
2-
==================================================================================
2+
========
33

44

55
Introduction
@@ -54,12 +54,12 @@ Command-line parameters
5454
.. code-block::
5555
5656
$ attotree -h
57-
57+
5858
Program: attotree (rapid estimation of phylogenetic trees using sketching)
59-
Version: 0.1.1
59+
Version: 0.1.3
6060
Author: Karel Brinda <[email protected]>
6161
62-
usage: attotree [-k INT] [-s INT] [-t INT] [-o FILE] [-f STR] [-L] genomes [genomes ...]
62+
usage: attotree [-k INT] [-s INT] [-t INT] [-o FILE] [-f STR] [-L] [-D] [-V] genomes [genomes ...]
6363
6464
positional arguments:
6565
genomes input genome file (fasta / gzipped fasta / list of files when "-L")
@@ -73,6 +73,8 @@ Command-line parameters
7373
-o FILE newick output [stdout]
7474
-f STR tree inference algorithm (nj/upgma) [nj]
7575
-L input files are list of files
76+
-D debugging (don't remove tmp dir)
77+
-V verbose output
7678
7779
7880
@@ -98,4 +100,3 @@ Authors
98100
-------
99101

100102
`Karel Brinda <http://brinda.eu>`_ <[email protected]>
101-

attotree/attotree.py

+100-26
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@
2828
DEFAULT_F = "nj"
2929

3030

31+
def shorten_output(s):
32+
"""
33+
Shortens the output string if it exceeds 40 characters.
34+
35+
Args:
36+
s (str): The input string.
37+
38+
Returns:
39+
str: The shortened string.
40+
"""
41+
if len(s) > 40:
42+
s = s[:40] + "..."
43+
return s
44+
45+
3146
def error(*msg, error_code=1):
3247
"""
3348
Prints an error message to stderr and exits the program with the specified error code.
@@ -59,7 +74,7 @@ def message(*msg):
5974
print(log_line, file=sys.stderr)
6075

6176

62-
def run_safe(command, output_fn=None, output_fo=None, err_msg=None, thr_exc=True, silent=False):
77+
def run_safe(command, output_fn=None, output_fo=None, err_msg=None, thr_exc=True, silent=False, verbose=True):
6378
"""
6479
Executes a shell command safely.
6580
@@ -91,9 +106,13 @@ def run_safe(command, output_fn=None, output_fo=None, err_msg=None, thr_exc=True
91106
command_safe.append(part)
92107

93108
command_str = " ".join(command_safe)
109+
if verbose:
110+
command_str_nice = command_str
111+
else:
112+
command_str_nice = shorten_output(command_str)
94113

95114
if not silent:
96-
message("Shell command:", command_str)
115+
message(f"Shell command: '{command_str_nice}'")
97116

98117
if output_fn is None:
99118
if output_fo is None:
@@ -120,9 +139,9 @@ def run_safe(command, output_fn=None, output_fo=None, err_msg=None, thr_exc=True
120139

121140
if error_code == 0 or error_code == 141:
122141
if not silent:
123-
message("Finished: {}".format(command_str))
142+
message(f"Finished: '{command_str_nice}'")
124143
else:
125-
message("Unfinished, an error occurred (error code {}): {}".format(error_code, command_str))
144+
message(f"Unfinished, an error occurred (error code {error_code}): '{command_str}'")
126145

127146
if err_msg is not None:
128147
print('Error: {}'.format(err_msg), file=sys.stderr)
@@ -133,7 +152,7 @@ def run_safe(command, output_fn=None, output_fo=None, err_msg=None, thr_exc=True
133152
sys.exit(1)
134153

135154

136-
def mash_triangle(inp_fns, phylip_fn, k, s, t, fof):
155+
def mash_triangle(inp_fns, phylip_fn, k, s, t, fof, verbose):
137156
"""
138157
Runs the 'mash triangle' command with the given parameters.
139158
@@ -151,12 +170,12 @@ def mash_triangle(inp_fns, phylip_fn, k, s, t, fof):
151170
Raises:
152171
None
153172
"""
154-
message("Running mash")
173+
message("Running Mash")
155174
cmd = f"mash triangle -s {s} -k {k} -p {t}".split()
156175
if fof:
157176
cmd += ["-l"]
158177
cmd += inp_fns
159-
run_safe(cmd, output_fn=phylip_fn)
178+
run_safe(cmd, output_fn=phylip_fn, verbose=verbose)
160179

161180

162181
def fn_to_node_name(fn):
@@ -177,7 +196,7 @@ def fn_to_node_name(fn):
177196
return nname
178197

179198

180-
def postprocess_mash_phylip(phylip_in_fn, phylip_out_fn):
199+
def postprocess_mash_phylip(phylip_in_fn, phylip_out_fn, verbose):
181200
"""
182201
Postprocesses a PHYLIP file by copying its contents from the input file to the output file.
183202
@@ -193,20 +212,13 @@ def postprocess_mash_phylip(phylip_in_fn, phylip_out_fn):
193212
for i, x in enumerate(f):
194213
x = x.strip()
195214
if i != 0:
196-
print(x, file=sys.stderr)
197215
l, sep, r = x.partition("\t")
198216
l = fn_to_node_name(l)
199217
x = l + sep + r
200-
message(x)
201218
print(x, file=g)
202-
#basename_components = os.path.basename(p[0]).split(".")
203-
#if len(basename_components) == 1:
204-
# basename_components.append("")
205-
## remove suffix
206-
#p[0] = ".".join(basename_components[:-1])
207219

208220

209-
def quicktree(phylip_fn, newick_fn, algorithm):
221+
def quicktree(phylip_fn, newick_fn, algorithm, verbose):
210222
"""
211223
Runs the quicktree algorithm to generate a phylogenetic tree.
212224
@@ -224,20 +236,24 @@ def quicktree(phylip_fn, newick_fn, algorithm):
224236
if algorithm == "upgma":
225237
cmd += ["-upgma"]
226238
cmd += [phylip_fn]
227-
run_safe(cmd, output_fn=newick_fn)
239+
run_safe(cmd, output_fn=newick_fn, verbose=verbose)
228240

229241

230-
def postprocess_quicktree_nw(nw_in_fn, nw_out_fo):
242+
def postprocess_quicktree_nw(nw_in_fn, nw_out_fo, verbose):
231243
"""
232244
Reformat newick.
233245
246+
This function reads an input newick file, removes any leading or trailing whitespace from each line,
247+
and writes the postprocessed newick file to the specified file object.
248+
234249
Notes:
235250
- assumption: node names already don't contain paths and prefixes
236251
- expects fo to allow both a filename or stdout
237252
238253
Args:
239254
nw_in_fn (str): Path to the input newick file.
240255
nw_out_fo (file object): File object to write the postprocessed newick file.
256+
verbose (bool): If True, print additional information during the postprocessing.
241257
242258
Returns:
243259
None
@@ -251,7 +267,7 @@ def postprocess_quicktree_nw(nw_in_fn, nw_out_fo):
251267
print("".join(buffer), file=nw_out_fo)
252268

253269

254-
def attotree(fns, newick_fo, k, s, t, phylogeny_algorithm, fof):
270+
def attotree(fns, newick_fo, k, s, t, phylogeny_algorithm, fof, verbose, debug):
255271
"""
256272
Generate a phylogenetic tree using the given parameters.
257273
@@ -263,27 +279,68 @@ def attotree(fns, newick_fo, k, s, t, phylogeny_algorithm, fof):
263279
t (int): Value for parameter t.
264280
phylogeny_algorithm (str): Name of the phylogeny algorithm to use.
265281
fof (bool): Flag indicating whether to use the fof parameter.
282+
verbose (bool): Flag indicating whether to enable verbose output.
283+
debug (bool): Flag indicating whether to retain auxiliary files.
266284
267285
Returns:
268286
None
269287
"""
270-
with tempfile.TemporaryDirectory() as d:
271-
message('created a temporary directory', d)
288+
features = []
289+
if verbose:
290+
features.append("verbose")
291+
if debug:
292+
features.append("debuging")
293+
if len(features) > 0:
294+
fmsg = f" ({', '.join(features)})"
295+
else:
296+
fmsg = ""
297+
message(f"Attotree starting{fmsg}")
298+
with tempfile.TemporaryDirectory(delete=not debug) as d:
299+
message('Created a temporary directory', d)
272300
phylip1_fn = os.path.join(d, "distances.phylip0")
273301
phylip2_fn = os.path.join(d, "distances.phylip")
274302
newick1_fn = os.path.join(d, "tree.nw")
275303
newick2_fo = newick_fo
276-
mash_triangle(fns, phylip1_fn, k=k, s=s, t=t, fof=fof)
277-
postprocess_mash_phylip(phylip1_fn, phylip2_fn)
278-
quicktree(phylip2_fn, newick1_fn, algorithm=phylogeny_algorithm)
279-
postprocess_quicktree_nw(newick1_fn, newick2_fo)
304+
if fof:
305+
#This is to make the list of file pass to Mash even with
306+
#process substitutions
307+
old_fof_fn = fns[0]
308+
new_fof_fn = os.path.join(d, "fof.txt")
309+
with open(old_fof_fn) as f, open(new_fof_fn, 'w') as g:
310+
g.write(f.read())
311+
fns = [new_fof_fn]
312+
mash_triangle(fns, phylip1_fn, k=k, s=s, t=t, fof=fof, verbose=verbose)
313+
postprocess_mash_phylip(phylip1_fn, phylip2_fn, verbose=verbose)
314+
quicktree(phylip2_fn, newick1_fn, algorithm=phylogeny_algorithm, verbose=verbose)
315+
postprocess_quicktree_nw(newick1_fn, newick2_fo, verbose=verbose)
316+
317+
if debug:
318+
emsg = f" (auxiliary files retained in '{d}')"
319+
else:
320+
emsg = ""
321+
message(f"Attotree finished{emsg}")
280322

281323

282324
def main():
325+
"""
326+
The main function that is executed when the script is run.
327+
328+
Returns:
329+
None
330+
"""
283331

284332
class CustomArgumentParser(argparse.ArgumentParser):
285333

334+
def __init__(self, prog=None, **kwargs):
335+
super().__init__(prog="attotree", **kwargs)
336+
286337
def print_help(self):
338+
"""
339+
Prints the help message.
340+
341+
Returns:
342+
None
343+
"""
287344
msg = self.format_help()
288345
repl = re.compile(r'\]\s+\[')
289346
msg = repl.sub("] [", msg)
@@ -369,6 +426,20 @@ def format_help(self):
369426
help=f'input files are list of files',
370427
)
371428

429+
parser.add_argument(
430+
'-D',
431+
action='store_true',
432+
dest='D',
433+
help=f'debugging (don\'t remove tmp dir)',
434+
)
435+
436+
parser.add_argument(
437+
'-V',
438+
action='store_true',
439+
dest='V',
440+
help=f'verbose output',
441+
)
442+
372443
parser.add_argument(
373444
'genomes',
374445
nargs="+",
@@ -378,7 +449,10 @@ def format_help(self):
378449
args = parser.parse_args()
379450

380451
#print(args)
381-
attotree(fns=args.genomes, k=args.k, s=args.s, t=args.t, newick_fo=args.o, phylogeny_algorithm=args.f, fof=args.L)
452+
attotree(
453+
fns=args.genomes, k=args.k, s=args.s, t=args.t, newick_fo=args.o, phylogeny_algorithm=args.f, fof=args.L,
454+
verbose=args.V, debug=args.D
455+
)
382456

383457
args = parser.parse_args()
384458

attotree/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
from __commit import *
33
except ImportError:
44
pass
5-
VERSION = "0.1.2"
5+
VERSION = "0.1.3"

tests/00_data/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.fa

tests/00_data/Makefile

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
.PHONY: all help clean cleanall view
2+
3+
SHELL=/usr/bin/env bash -eo pipefail
4+
5+
.SECONDARY:
6+
7+
.SUFFIXES:
8+
9+
FASTAS=001334.fa 101058.fa 203692.fa 302649.fa 403790.fa
10+
11+
##############
12+
## Commands ##
13+
##############
14+
15+
all: test_spneumo.tar.xz
16+
17+
test_spneumo.tar.xz: $(FASTAS)
18+
tar -cvf - $? \
19+
| xz -T1 -9 \
20+
> $@.tmp
21+
mv $@.tmp $@
22+
23+
%.fa:
24+
curl -L "https://github.com/c2-d2/rase-db-spneumoniae-sparc/raw/master/isolates/$*.fa" \
25+
| seqtk seq \
26+
> $@.tmp
27+
mv $@.tmp $@
28+
29+
30+
help: ## Print help messages
31+
@echo -e "$$(grep -hE '^\S*(:.*)?##' $(MAKEFILE_LIST) \
32+
| sed \
33+
-e 's/:.*##\s*/:/' \
34+
-e 's/^\(.*\):\(.*\)/ \\x1b[36m\1\\x1b[m:\2/' \
35+
-e 's/^\([^#]\)/\1/g' \
36+
-e 's/: /:/g' \
37+
-e 's/^#\(.*\)#/\\x1b[90m\1\\x1b[m/' \
38+
| column -c2 -t -s : )"
39+
40+
clean: ## Clean
41+
rm -fv *.fa
42+
43+
cleanall: clean ## Clean all
44+
rm -fv *.tar.xz
45+
46+
view: ## View output

tests/00_data/test_spneumo.tar.xz

1000 KB
Binary file not shown.

tests/02_simple_tree/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
file_list.txt
12
*.fa
23
*.nw

0 commit comments

Comments
 (0)