Skip to content

Commit c76bfef

Browse files
end-to-end: split cli into multiple commands (#1042)
This allows a more step-wise approach, which is useful for e.g. generate builds in one session, and generating harnesses in other sessions. Signed-off-by: David Korczynski <[email protected]>
1 parent 175b426 commit c76bfef

File tree

1 file changed

+257
-77
lines changed

1 file changed

+257
-77
lines changed

experimental/end_to_end/cli.py

+257-77
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,14 @@ def _run_introspector_collection(runner_script, project, wd, semaphore):
102102
semaphore.release()
103103

104104

105-
def extract_introspector_reports_for_benchmarks(projects_to_run, workdir, args):
105+
def extract_introspector_reports_for_benchmarks(projects_to_run, workdir,
106+
parallel_build_jobs):
106107
"""Runs introspector through each report to collect program analysis data."""
107108
oss_fuzz_dir = os.path.join(workdir, 'oss-fuzz')
108109
runner_script = os.path.join(workdir, 'fuzz-introspector',
109110
'oss_fuzz_integration', 'runner.py')
110111

111-
semaphore = threading.Semaphore(args.build_jobs)
112+
semaphore = threading.Semaphore(parallel_build_jobs)
112113
jobs = []
113114

114115
for project in projects_to_run:
@@ -392,25 +393,43 @@ def _create_data_dir(workdir):
392393
return dst_dir
393394

394395

395-
def run_harness_generation(out_gen, workdir, args):
396-
"""Runs harness generation based on the projects in `out_gen`"""
397-
396+
def prepare_fuzz_introspector_db(out_gen, workdir, parallel_introspector_jobs):
397+
# Run introspector collection on the generated projects
398398
projects_to_run = copy_generated_projects_to_harness_gen(out_gen, workdir)
399-
extract_introspector_reports_for_benchmarks(projects_to_run, workdir, args)
399+
extract_introspector_reports_for_benchmarks(projects_to_run, workdir,
400+
parallel_introspector_jobs)
401+
402+
# Create a fuzz introspector database based on the projects in
403+
# the working directory's OSS-Fuzz.
400404
shutdown_fi_webapp()
401405
create_fi_db(workdir)
402-
if args.until_fi_db:
403-
logger.info('Fuzz Introspector webapp created. Exiting.')
404-
sys.exit(0)
406+
407+
408+
def run_harness_generation(out_gen, workdir, args):
409+
"""Runs harness generation based on the projects in `out_gen`"""
410+
411+
# Read the json file from FI to get all current projects.
412+
fi_project_json = os.path.join(workdir, 'fuzz-introspector', 'tools',
413+
'web-fuzzing-introspection', 'app', 'static',
414+
'assets', 'db', 'all-project-current.json')
415+
if not os.path.isfile(fi_project_json):
416+
logger.info('Did not find FI DB file.')
417+
set()
418+
419+
projects_to_run = []
420+
with open(fi_project_json, 'r') as f:
421+
json_content = json.load(f)
422+
for elem in json_content:
423+
projects_to_run.append(elem['project_name'])
424+
425+
# Launch the fuzz introspector webapp so it's ready for OFG core
405426
shutdown_fi_webapp()
406427
launch_fi_webapp(workdir)
407428
wait_until_fi_webapp_is_launched()
408429
dst_data_dir = _create_data_dir(workdir)
409430
logger.info('Wrote data directory for OFG experiments in %s', dst_data_dir)
410-
if args.all_but_ofg_core:
411-
# do a prompt exist
412-
sys.exit(0)
413431

432+
# Run OFG core using local OSS-Fuzz and local Fuzz Introspector.
414433
run_ofg_generation(projects_to_run, workdir, args)
415434

416435
create_merged_oss_fuzz_projects(out_gen)
@@ -442,20 +461,9 @@ def _get_next_data_dst_dir():
442461
return _get_next_folder_in_idx('data-dir')
443462

444463

445-
def run_analysis(args):
446-
"""Generates builds and harnesses for repositories in input."""
447-
workdir = setup_workdirs(args.workdir)
448-
449-
abs_workdir = os.path.abspath(workdir)
450-
if not args.out:
451-
out_folder = get_next_out_folder()
452-
else:
453-
out_folder = args.out
454-
455-
#if os.path.isdir('results'):
456-
# shutil.rmtree('results')
457-
458-
oss_fuzz_dir = os.path.join(abs_workdir, 'oss-fuzz-1')
464+
def _run_build_generation(workdir, out_folder, args):
465+
""" Build script generation. """
466+
oss_fuzz_dir = os.path.join(workdir, 'oss-fuzz-1')
459467
target_repositories = runner.extract_target_repositories(args.input)
460468
if args.agent:
461469
# Prepare arguments used deeper in OFG core.
@@ -472,14 +480,72 @@ def run_analysis(args):
472480
parallel_jobs=args.build_jobs,
473481
max_timeout=args.build_timeout)
474482

475-
# Exit if only builds are required.
476-
if args.build_only:
477-
logger.info('Finished analysis')
478-
logger.info('Results in %s', out_folder)
479-
return
480483

484+
def run_fuzz_introspector_db_creation(args):
485+
"""Entrypoint for fuzz introspector database creation."""
486+
args.workdir = os.path.abspath(args.workdir)
487+
prepare_fuzz_introspector_db(args.generated_builds, args.workdir,
488+
args.parallel_build_jobs)
489+
490+
491+
def run_build_generation(args):
492+
"""Generates builds and harnesses for repositories in input."""
493+
494+
# Prepare working directory.
495+
workdir = setup_workdirs(args.workdir)
496+
497+
abs_workdir = os.path.abspath(workdir)
498+
if not args.out:
499+
out_folder = get_next_out_folder()
500+
else:
501+
out_folder = args.out
502+
503+
_run_build_generation(abs_workdir, out_folder, args)
504+
505+
506+
def run_cmd_harness_generation(args):
507+
"""Entrypoint for command for harness generation."""
508+
509+
# Prepare working directory.
510+
abs_workdir = os.path.abspath(args.workdir)
511+
512+
out_folder = args.out
513+
514+
# Run harness generation.
481515
projects_run = run_harness_generation(out_folder, abs_workdir, args)
482516

517+
# Log results.
518+
logger.info('Finished analysis')
519+
logger.info('Results in %s', out_folder)
520+
logger.info('Projects generated (%d): ', len(projects_run))
521+
for project in projects_run:
522+
logger.info('- %s', project)
523+
524+
if os.path.isdir('results'):
525+
shutil.copytree('results', os.path.join(out_folder, 'harness-results'))
526+
527+
528+
def run_full(args):
529+
"""Generates builds and harnesses for repositories in input."""
530+
531+
# Prepare working directory.
532+
workdir = setup_workdirs(args.workdir)
533+
534+
abs_workdir = os.path.abspath(workdir)
535+
if not args.out:
536+
out_folder = get_next_out_folder()
537+
else:
538+
out_folder = args.out
539+
540+
_run_build_generation(abs_workdir, out_folder, args)
541+
542+
# Prepare fuzz introspector database.
543+
prepare_fuzz_introspector_db(out_folder, abs_workdir, args.build_jobs)
544+
545+
# Run harness generation.
546+
projects_run = run_harness_generation(out_folder, abs_workdir, args)
547+
548+
# Log results.
483549
logger.info('Finished analysis')
484550
logger.info('Results in %s', out_folder)
485551
logger.info('Projects generated (%d): ', len(projects_run))
@@ -493,56 +559,162 @@ def run_analysis(args):
493559
def parse_commandline():
494560
"""Parse the commandline."""
495561
parser = argparse.ArgumentParser()
496-
parser.add_argument('--input', '-i', help='Input to analyze')
497-
parser.add_argument('--out',
498-
'-o',
499-
help='Directory to store output.',
500-
default='oss-fuzz-generated')
501-
parser.add_argument('--silent',
502-
'-s',
503-
help='Disable logging in subprocess.',
504-
action='store_true')
505-
parser.add_argument('--model',
506-
'-m',
507-
help=('Models available: '
508-
f'{", ".join(models.LLM.all_llm_names())}.'),
509-
type=str)
510-
parser.add_argument('--agent',
511-
'-a',
512-
help='Enable agent workflow',
513-
action='store_true')
514-
parser.add_argument('--hg-agent',
515-
'-ha',
516-
help='Enable agent harness generation',
517-
action='store_true')
518-
parser.add_argument('-gm',
519-
'--generate-benchmarks-max',
520-
help='Max targets to generate per benchmark heuristic.',
521-
type=int,
522-
default=5)
523-
parser.add_argument('-mr',
524-
'--max-round',
525-
type=int,
526-
default=5,
527-
help='Max trial round for agents.')
528-
parser.add_argument('--build-only',
529-
help='Only generated builds',
530-
action='store_true')
531-
parser.add_argument('--build-jobs',
532-
help='Parallel build-generator jobs to run.',
533-
default=2,
534-
type=int)
535-
parser.add_argument('--all-but-ofg-core', action='store_true')
536-
parser.add_argument(
562+
subparsers = parser.add_subparsers(dest='command')
563+
564+
# Run build generation.
565+
run_build_gen = subparsers.add_parser(
566+
'generate-builds',
567+
help='Generate OSS-Fuzz projects with build scripts but empty fuzzers.')
568+
run_build_gen.add_argument('--input', '-i', help='Input to analyze')
569+
run_build_gen.add_argument('--out',
570+
'-o',
571+
help='Directory to store output.',
572+
default='oss-fuzz-generated')
573+
run_build_gen.add_argument('--silent',
574+
'-s',
575+
help='Disable logging in subprocess.',
576+
action='store_true')
577+
run_build_gen.add_argument('--model',
578+
'-m',
579+
help=('Models available: '
580+
f'{", ".join(models.LLM.all_llm_names())}.'),
581+
type=str)
582+
run_build_gen.add_argument('--agent',
583+
'-a',
584+
help='Enable agent workflow',
585+
action='store_true')
586+
run_build_gen.add_argument(
587+
'-gm',
588+
'--generate-benchmarks-max',
589+
help='Max targets to generate per benchmark heuristic.',
590+
type=int,
591+
default=5)
592+
run_build_gen.add_argument('-mr',
593+
'--max-round',
594+
type=int,
595+
default=5,
596+
help='Max trial round for agents.')
597+
run_build_gen.add_argument('--build-jobs',
598+
help='Parallel build-generator jobs to run.',
599+
default=2,
600+
type=int)
601+
run_build_gen.add_argument(
537602
'--build-timeout',
538603
help='Timeout for build generation per project, in seconds.',
539604
default=0,
540605
type=int)
541-
parser.add_argument(
542-
'--until-fi-db',
543-
help='Run until Fuzz Introspector DB creation and then exit.',
606+
run_build_gen.add_argument('-w', '--workdir', help='Work directory to use')
607+
608+
# Generate fuzz introspector database.
609+
run_generate_fi_db_parser = subparsers.add_parser(
610+
'generate-fuzz-introspector-database',
611+
help='Generates a fuzz introspector database from auto build projects.')
612+
613+
run_generate_fi_db_parser.add_argument('--generated-builds', required=True)
614+
run_generate_fi_db_parser.add_argument('--workdir', required=True)
615+
run_generate_fi_db_parser.add_argument('--parallel-build-jobs',
616+
type=int,
617+
default=5)
618+
run_generate_fi_db_parser.add_argument('--silent',
619+
'-s',
620+
help='Disable logging in subprocess.',
621+
action='store_true')
622+
623+
# Run harness generation
624+
run_harness_generation_parser = subparsers.add_parser(
625+
'generate-harnesses',
626+
help="Harness generation of OSS-Fuzz projects.",
627+
)
628+
629+
run_harness_generation_parser.add_argument('--out',
630+
'-o',
631+
help='Directory to store output.',
632+
default='oss-fuzz-generated')
633+
run_harness_generation_parser.add_argument(
634+
'--silent',
635+
'-s',
636+
help='Disable logging in subprocess.',
637+
action='store_true')
638+
run_harness_generation_parser.add_argument(
639+
'--model',
640+
'-m',
641+
help=('Models available: '
642+
f'{", ".join(models.LLM.all_llm_names())}.'),
643+
type=str)
644+
run_harness_generation_parser.add_argument('--agent',
645+
'-a',
646+
help='Enable agent workflow',
647+
action='store_true')
648+
run_harness_generation_parser.add_argument(
649+
'--hg-agent',
650+
'-ha',
651+
help='Enable agent harness generation',
544652
action='store_true')
545-
parser.add_argument('-w', '--workdir', help='Work directory to use')
653+
run_harness_generation_parser.add_argument(
654+
'-gm',
655+
'--generate-benchmarks-max',
656+
help='Max targets to generate per benchmark heuristic.',
657+
type=int,
658+
default=5)
659+
run_harness_generation_parser.add_argument('-mr',
660+
'--max-round',
661+
type=int,
662+
default=5,
663+
help='Max trial round for agents.')
664+
run_harness_generation_parser.add_argument('-w',
665+
'--workdir',
666+
help='Work directory to use')
667+
668+
# Run a full end to end generation.
669+
run_full_parser = subparsers.add_parser(
670+
'generate-full',
671+
help="End to end generation of OSS-Fuzz projects.",
672+
)
673+
run_full_parser.add_argument('--input', '-i', help='Input to analyze')
674+
run_full_parser.add_argument('--out',
675+
'-o',
676+
help='Directory to store output.',
677+
default='oss-fuzz-generated')
678+
run_full_parser.add_argument('--silent',
679+
'-s',
680+
help='Disable logging in subprocess.',
681+
action='store_true')
682+
run_full_parser.add_argument(
683+
'--model',
684+
'-m',
685+
help=('Models available: '
686+
f'{", ".join(models.LLM.all_llm_names())}.'),
687+
type=str)
688+
run_full_parser.add_argument('--agent',
689+
'-a',
690+
help='Enable agent workflow',
691+
action='store_true')
692+
run_full_parser.add_argument('--hg-agent',
693+
'-ha',
694+
help='Enable agent harness generation',
695+
action='store_true')
696+
run_full_parser.add_argument(
697+
'-gm',
698+
'--generate-benchmarks-max',
699+
help='Max targets to generate per benchmark heuristic.',
700+
type=int,
701+
default=5)
702+
run_full_parser.add_argument('-mr',
703+
'--max-round',
704+
type=int,
705+
default=5,
706+
help='Max trial round for agents.')
707+
run_full_parser.add_argument('--build-jobs',
708+
help='Parallel build-generator jobs to run.',
709+
default=2,
710+
type=int)
711+
run_full_parser.add_argument(
712+
'--build-timeout',
713+
help='Timeout for build generation per project, in seconds.',
714+
default=0,
715+
type=int)
716+
run_full_parser.add_argument('-w', '--workdir', help='Work directory to use')
717+
546718
return parser.parse_args()
547719

548720

@@ -551,7 +723,15 @@ def main():
551723
args = parse_commandline()
552724
setup_logging()
553725
silent_global = args.silent
554-
run_analysis(args)
726+
727+
if args.command == 'generate-full':
728+
run_full(args)
729+
if args.command == 'generate-fuzz-introspector-database':
730+
run_fuzz_introspector_db_creation(args)
731+
if args.command == 'generate-builds':
732+
run_build_generation(args)
733+
if args.command == 'generate-harnesses':
734+
run_cmd_harness_generation(args)
555735

556736

557737
if __name__ == '__main__':

0 commit comments

Comments
 (0)