Skip to content

Commit ae16be3

Browse files
committed
Added the CLI run_AREXEE and set up its preliminary structure
1 parent b6d5fe1 commit ae16be3

File tree

2 files changed

+131
-0
lines changed

2 files changed

+131
-0
lines changed

ensemble_md/cli/run_AREXEE.py

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
####################################################################
2+
# #
3+
# ensemble_md, #
4+
# a python package for running GROMACS simulation ensembles #
5+
# #
6+
# Written by Wei-Tse Hsu <[email protected]> #
7+
# Copyright (c) 2022 University of Colorado Boulder #
8+
# #
9+
####################################################################
10+
import sys
11+
import time
12+
import argparse
13+
import warnings
14+
from mpi4py import MPI
15+
16+
from ensemble_md.utils import utils
17+
from ensemble_md.cli.run_REXEE import initialize
18+
19+
warnings.warn('This module is only for experimental purposes and still in progress. Please do not use it for any production research.', UserWarning) # noqa: E501
20+
21+
"""
22+
Currently, this CLI still uses MPI to run REXEE simulations, but it tries to mock some behaviors of asynchronous REXEE in the following way:
23+
1. Finish an iteration of the REXEE simulation.
24+
2. Based on the time it took for each simulation to finish, figure out the order in which the replicas should be added to the queue.
25+
3. Apply a queueing algorithm to figure out what replicas to swap first.
26+
27+
Eventually, we would like to get rid of the use of MPI and really rely on asynchronous parallelization schemes. The most likely
28+
direction is to use functionalities in airflowHPC to manage the queueing and launching of replicas. If possible, this CLI should be
29+
integrated into the CLI run_REXEE.
30+
"""
31+
32+
33+
def main():
34+
t1 = time.time()
35+
args = initialize(sys.argv[1:])
36+
sys.stdout = utils.Logger(logfile=args.output)
37+
sys.stderr = utils.Logger(logfile=args.output)
38+
39+
# Step 1: Set up MPI rank and instantiate ReplicaExchangeEE to set up REXEE parameters
40+
comm = MPI.COMM_WORLD
41+
rank = comm.Get_rank() # Note that this is a GLOBAL variable
42+
43+
if rank == 0:
44+
print(f'Current time: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}')
45+
print(f'Command line: {" ".join(sys.argv)}\n')
46+
47+
REXEE = ReplicaExchangeEE(args.yaml)
48+
49+
if rank == 0:
50+
# Print out simulation parameters
51+
REXEE.print_params()
52+
53+
# Print out warnings and fail if needed
54+
for i in REXEE.warnings:
55+
print(f'\n{i}\n')
56+
57+
if len(REXEE.warnings) > args.maxwarn:
58+
print(f"The execution failed due to warning(s) about parameter spcificaiton. Check the warnings, or consider setting maxwarn in the input YAML file if you find them harmless.") # noqa: E501, F541
59+
comm.Abort(101)
60+
61+
# Step 2: If there is no checkpoint file found/provided, perform the 1st iteration (index 0)
62+
63+
# Note that here we assume no checkpoint files just to minimize this CLI.
64+
# We also leave out Step 2-3 since we won't be using this CLI to test calculations with any restraints.
65+
start_idx = 1
66+
67+
# 2-1. Set up input files for all simulations
68+
if rank == 0:
69+
for i in range(REXEE.n_sim):
70+
os.mkdir(f'{REXEE.working_dir}/sim_{i}')
71+
os.mkdir(f'{REXEE.working_dir}/sim_{i}/iteration_0')
72+
MDP = REXEE.initialize_MDP(i)
73+
MDP.write(f"{REXEE.working_dir}/sim_{i}/iteration_0/expanded.mdp", skipempty=True)
74+
if REXEE.modify_coords == 'default' and (not os.path.exists('residue_connect.csv') or not os.path.exists('residue_swap_map.csv')): # noqa: E501
75+
REXEE.process_top()
76+
77+
# 2-2. Run the first set of simulations
78+
REXEE.run_REXEE(0)
79+
80+
for i in range(start_idx, REXEE.n_iter):
81+
try:
82+
if rank == 0:
83+
# Step 3: Swap the coordinates
84+
# Note that here we leave out Steps 3-3 and 3-4, which are for weight combination/correction and coordinate modification, respectively.
85+
86+
# 3-1. Extract the final dhdl and log files from the previous iteration
87+
dhdl_files = [f'{REXEE.working_dir}/sim_{j}/iteration_{i - 1}/dhdl.xvg' for j in range(REXEE.n_sim)]
88+
log_files = [f'{REXEE.working_dir}/sim_{j}/iteration_{i - 1}/md.log' for j in range(REXEE.n_sim)]
89+
states_ = REXEE.extract_final_dhdl_info(dhdl_files)
90+
wl_delta, weights_, counts_ = REXEE.extract_final_log_info(log_files)
91+
print()
92+
93+
# 3-2. Identify swappable pairs, propose swap(s), calculate P_acc, and accept/reject swap(s)
94+
states = copy.deepcopy(states_)
95+
weights = copy.deepcopy(weights_)
96+
counts = copy.deepcopy(counts_)
97+
swap_pattern, swap_list = REXEE.get_swapping_pattern(dhdl_files, states_) # swap_list will only be used for modify_coords # noqa: E501
98+
else:
99+
swap_pattern, swap_list = None, None
100+
101+
except Exception:
102+
print('\n--------------------------------------------------------------------------\n')
103+
print(f'An error occurred on rank 0:\n{traceback.format_exc()}')
104+
MPI.COMM_WORLD.Abort(1)
105+
106+
# Note that we leave out the block for exiting the for loop when the weights got equilibrated, as this CLI
107+
# won't be tested for weight-updating simulations for now.
108+
109+
# Step 4: Perform another iteration
110+
# Here we leave out the block that uses swap_list, which is only for coordinate modifications.
111+
swap_pattern = comm.bcast(swap_pattern, root=0)
112+
113+
# Here we run another set of simulations (i.e. Step 4-2 in CLI run_REXEE)
114+
REXEE.run_REXEE(i, swap_pattern)
115+
116+
# Here we leave out the block for saving data (i.e. Step 4-3 in CLI run_REXEE) since we won't run for too many iterations when testing this CLI.
117+
118+
# Step 5: Write a summary for the simulation ensemble
119+
if rank == 0:
120+
print('\nSummary of the simulation ensemble')
121+
print('==================================')
122+
123+
# We leave out the section showing the simulation status.
124+
print(f'\n{REXEE.n_empty_swappable} out of {REXEE.n_iter}, or {REXEE.n_empty_swappable / REXEE.n_iter * 100:.1f}% iterations had an empty list of swappable pairs.') # noqa: E501
125+
if REXEE.n_swap_attempts != 0:
126+
print(f'{REXEE.n_rejected} out of {REXEE.n_swap_attempts}, or {REXEE.n_rejected / REXEE.n_swap_attempts * 100:.1f}% of attempted exchanges were rejected.') # noqa: E501
127+
128+
print(f'\nTime elapsed: {utils.format_time(time.time() - t1)}')
129+
130+
MPI.Finalize()

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
'run_REXEE = ensemble_md.cli.run_REXEE:main',
7777
'analyze_REXEE = ensemble_md.cli.analyze_REXEE:main',
7878
'explore_REXEE = ensemble_md.cli.explore_REXEE:main',
79+
'run_AREXEE = ensemble_md.cli.run_AREXEE:main',
7980
],
8081
},
8182

0 commit comments

Comments
 (0)