google
diff --git a/‎experimental/build_generator/build_script_generator.py
+3-3 b/‎experimental/build_generator/build_script_generator.py
+3-3
diff --git a/‎experimental/build_generator/file_utils.py
+72 b/‎experimental/build_generator/file_utils.py
+72
diff --git a/‎experimental/build_generator/llm_agent.py
+285 b/‎experimental/build_generator/llm_agent.py
+285
@@ -23,7 +23,7 @@
 from typing import Dict, Iterator, List, Optional, Tuple
 
 import constants
-import manager
+import file_utils as utils
 
 logger = logging.getLogger(name=__name__)
 
@@ -843,7 +843,7 @@ def match_build_heuristics_on_folder(abspath_of_target: str):
   Traverses the files in the target folder. Uses the file list as input to
   auto build heuristics, and for each heuristic will yield any of the
   build steps that are deemed matching."""
-  all_files = manager.get_all_files_in_path(abspath_of_target)
+  all_files = utils.get_all_files_in_path(abspath_of_target)
   all_checks = [
       AutogenConfScanner(),
       PureCFileCompiler(),
@@ -887,7 +887,7 @@ def match_build_heuristics_on_folder(abspath_of_target: str):
 
 def get_all_binary_files_from_folder(path: str) -> Dict[str, List[str]]:
   """Extracts binary artifacts from a list of files, based on file suffix."""
-  all_files = manager.get_all_files_in_path(path, path)
+  all_files = utils.get_all_files_in_path(path, path)
 
   executable_files = {'static-libs': [], 'dynamic-libs': [], 'object-files': []}
   for fil in all_files:
 
@@ -0,0 +1,72 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""File utils for target repository"""
+
+import os
+from typing import List, Optional
+
+try:
+  # For execution outside of a docker container
+  from experimental.build_generator import templates
+except (ImportError, SystemError):
+  # For execution inside of a docker container
+  import templates
+
+
+def determine_project_language(path: str) -> str:
+  """Returns the likely language of a project by looking at file suffixes."""
+  all_files = get_all_files_in_path(path, path)
+
+  language_dict = {'c': 0, 'c++': 0}
+  for source_file in all_files:
+    if source_file.endswith('.c'):
+      language_dict['c'] = language_dict['c'] + 1
+    elif source_file.endswith('.cpp'):
+      language_dict['c++'] = language_dict['c++'] + 1
+    elif source_file.endswith('.cc'):
+      language_dict['c++'] = language_dict['c++'] + 1
+
+  target_language = 'c++'
+  max_count = 0
+  for language, count in language_dict.items():
+    if count > max_count:
+      target_language = language
+      max_count = count
+  return target_language
+
+
+def get_language_defaults(language: str):
+  compilers_and_flags = {
+      'c': ('$CC', '$CFLAGS', '/src/empty-fuzzer.c', templates.C_BASE_TEMPLATE),
+      'c++': ('$CXX', '$CXXFLAGS', '/src/empty-fuzzer.cpp',
+              templates.CPP_BASE_TEMPLATE),
+  }
+  return compilers_and_flags[language]
+
+
+def get_all_files_in_path(base_path: str,
+                          path_to_subtract: Optional[str] = None) -> List[str]:
+  """Gets all files in a tree and returns as a list of strings."""
+  all_files = []
+  if path_to_subtract is None:
+    path_to_subtract = os.getcwd()
+  for root, _, files in os.walk(base_path):
+    for fi in files:
+      path = os.path.join(root, fi)
+      if path.startswith(path_to_subtract):
+        path = path[len(path_to_subtract):]
+      if len(path) > 0 and path[0] == '/':
+        path = path[1:]
+      all_files.append(path)
+  return all_files
@@ -0,0 +1,285 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LLM Build Script Agent"""
+
+import argparse
+import os
+import re
+import subprocess
+from typing import Optional
+
+import logger
+from agent.base_agent import BaseAgent
+from experimental.build_generator import file_utils, templates
+from llm_toolkit.models import LLM
+from llm_toolkit.prompts import Prompt
+from results import BuildResult, Result
+from tool.base_tool import BaseTool
+from tool.container_tool import ProjectContainerTool
+
+MAX_PROMPT_LENGTH = 20000
+
+
+class BuildScriptAgent(BaseAgent):
+  """Base class for buidl script agent."""
+
+  def __init__(self,
+               trial: int,
+               llm: LLM,
+               args: argparse.Namespace,
+               github_url: str,
+               language: str,
+               tools: Optional[list[BaseTool]] = None,
+               name: str = ''):
+    super().__init__(trial, llm, args, tools, name)
+    self.github_url = github_url
+    self.language = language
+    self.build_files = {}
+    self.last_status = False
+    self.last_result = ''
+    self.target_files = {}
+
+    # Get sample fuzzing harness
+    _, _, self.harness_path, self.harness_code = (
+        file_utils.get_language_defaults(self.language))
+
+  def _parse_tag(self, response: str, tag: str) -> str:
+    """Parses the tag from LLM response."""
+    patterns = [rf'<{tag}>(.*?)</{tag}>', rf'```{tag}(.*?)```']
+
+    # Matches both xml and code style tags
+    for pattern in patterns:
+      match = re.search(pattern, response, re.DOTALL)
+      if match:
+        return match.group(1).strip()
+
+    return ''
+
+  def _parse_tags(self, response: str, tag: str) -> list[str]:
+    """Parses the tags from LLM response."""
+    patterns = [rf'<{tag}>(.*?)</{tag}>', rf'```{tag}(.*?)```']
+    found_matches = []
+
+    # Matches both xml and code style tags
+    for pattern in patterns:
+      matches = re.findall(pattern, response, re.DOTALL)
+      found_matches.extend([content.strip() for content in matches])
+
+    return found_matches
+
+  def _container_handle_bash_commands(self, response: str, tool: BaseTool,
+                                      prompt: Prompt) -> Prompt:
+    """Handles the command from LLM with container |tool|."""
+    # Update fuzzing harness
+    harness = self._parse_tag(response, 'fuzzer')
+    if harness:
+      self.harness_code = harness
+    if isinstance(tool, ProjectContainerTool):
+      tool.write_to_file(self.harness_code, self.harness_path)
+
+    # Try execute the generated build script
+    prompt_text = ''
+    success = True
+    for command in self._parse_tags(response, 'bash'):
+      result = tool.execute(command)
+      success = success and (result.returncode == 0)
+      format_result = self._format_bash_execution_result(result,
+                                                         previous_prompt=prompt)
+      prompt_text += self._parse_tag(format_result, 'stderr') + '\n'
+
+    self.last_status = success
+    self.last_result = prompt_text
+
+    return prompt
+
+  def _container_handle_conclusion(self, cur_round: int, response: str,
+                                   build_result: BuildResult,
+                                   prompt: Prompt) -> Optional[Prompt]:
+    """Runs a compilation tool to validate the new build script from LLM."""
+    logger.info('----- ROUND %02d Received conclusion -----',
+                cur_round,
+                trial=build_result.trial)
+
+    # Execution fail
+    if not self.last_status:
+      retry = templates.LLM_RETRY.replace('{BASH_RESULT}', self.last_result)
+
+      # Refine prompt text to max prompt count and add to prompt
+      length = min(len(retry), (MAX_PROMPT_LENGTH - len(prompt.gettext())))
+      prompt.add_problem(retry[-length:])
+
+      # Store build result
+      build_result.compiles = False
+      build_result.compile_error = self.last_result
+
+      return prompt
+
+    # Execution success
+    build_result.compiles = True
+    build_result.fuzz_target_source = self.harness_code
+    build_script_source = '\n'.join(self._parse_tags(response, 'bash'))
+    if not build_script_source.startswith('#!'):
+      build_script_source = templates.EMPTY_OSS_FUZZ_BUILD + build_script_source
+    build_result.build_script_source = build_script_source
+
+    return None
+
+  def _container_tool_reaction(self, cur_round: int, response: str,
+                               build_result: BuildResult) -> Optional[Prompt]:
+    """Validates LLM conclusion or executes its command."""
+    prompt = self.llm.prompt_type()(None)
+
+    if response:
+      prompt = self._container_handle_bash_commands(response, self.inspect_tool,
+                                                    prompt)
+
+      # Check result and try building with the new builds script
+      prompt = self._container_handle_conclusion(cur_round, response,
+                                                 build_result, prompt)
+
+      if prompt is None:
+        return None
+
+    if not response or not prompt or not prompt.get():
+      prompt = self._container_handle_invalid_tool_usage(
+          self.inspect_tool, cur_round, response, prompt)
+
+    return prompt
+
+  def execute(self, result_history: list[Result]) -> BuildResult:
+    """Executes the agent based on previous result."""
+    last_result = result_history[-1]
+    logger.info('Executing %s', self.name, trial=last_result.trial)
+    benchmark = last_result.benchmark
+    self.inspect_tool = ProjectContainerTool(benchmark, name='inspect')
+    self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
+    cur_round = 1
+    build_result = BuildResult(benchmark=benchmark,
+                               trial=last_result.trial,
+                               work_dirs=last_result.work_dirs,
+                               author=self,
+                               chat_history={self.name: ''})
+
+    prompt = self._initial_prompt(result_history)
+    try:
+      client = self.llm.get_chat_client(model=self.llm.get_model())
+      while prompt and cur_round < self.max_round:
+        response = self.chat_llm(cur_round,
+                                 client=client,
+                                 prompt=prompt,
+                                 trial=last_result.trial)
+        prompt = self._container_tool_reaction(cur_round, response,
+                                               build_result)
+        cur_round += 1
+    finally:
+      logger.info('Stopping and removing the inspect container %s',
+                  self.inspect_tool.container_id,
+                  trial=last_result.trial)
+      self.inspect_tool.terminate()
+
+    return build_result
+
+
+class BuildSystemBuildScriptAgent(BuildScriptAgent):
+  """Generate a working Dockerfile and build script from scratch
+  with build system."""
+
+  def __init__(self,
+               trial: int,
+               llm: LLM,
+               args: argparse.Namespace,
+               github_url: str,
+               language: str,
+               tools: Optional[list[BaseTool]] = None,
+               name: str = ''):
+    super().__init__(trial, llm, args, github_url, language, tools, name)
+    self.target_files = {
+        'Makefile': [],
+        'configure.ac': [],
+        'Makefile.am': [],
+        'autogen.sh': [],
+        'bootstrap.sh': [],
+        'CMakeLists.txt': [],
+        'Config.in': [],
+    }
+
+  def _discover_build_configurations(self) -> bool:
+    """Helper to discover the build configuartions of a repository."""
+    # Clone targert repository
+    target_path = os.path.join(self.args.work_dirs,
+                               self.github_url.split('/')[-1])
+    if not os.path.isdir(target_path):
+      subprocess.check_call(
+          f'git clone --recurse-submodules {self.github_url} {target_path}',
+          shell=True)
+
+    # Locate common build configuration files
+    for root_dir, _, files in os.walk(target_path):
+      for file in files:
+        if file in self.target_files:
+          full_path = os.path.join(root_dir, file)
+          self.target_files[file].append(full_path)
+
+    # Extract content of build files
+    for files in self.target_files.values():
+      for file in files:
+        with open(file, 'r') as f:
+          self.build_files[file.replace(target_path, '')] = f.read()
+
+    return len(self.build_files) > 0
+
+  def _initial_prompt(self, results: list[Result]) -> Prompt:  # pylint: disable=unused-argument
+    """Constructs initial prompt of the agent."""
+    prompt = self.llm.prompt_type()(None)
+
+    # Extract build configuration files content
+    build_files_str = []
+    for file, content in self.build_files.items():
+      target_str = templates.LLM_BUILD_FILE_TEMPLATE.replace('{PATH}', file)
+      target_str = target_str.replace('{CONTENT}', content)
+      build_files_str.append(target_str)
+
+    # Extract template Dockerfile content
+    dockerfile_str = templates.CLEAN_OSS_FUZZ_DOCKER
+    dockerfile_str = dockerfile_str.replace('{additional_packages}', '')
+    dockerfile_str = dockerfile_str.replace('{repo_url}', self.github_url)
+    dockerfile_str = dockerfile_str.replace('{project_repo_dir}',
+                                            self.github_url.split('/')[-1])
+
+    # Prepare prompt problem string
+    problem = templates.LLM_PROBLEM.replace('{BUILD_FILES}',
+                                            '\n'.join(build_files_str))
+    problem = problem.replace('{DOCKERFILE}', dockerfile_str)
+    problem = problem.replace('{FUZZER}', self.harness_code)
+    problem = problem.replace('{FUZZING_FILE}',
+                              self.harness_path.split('/')[-1])
+
+    prompt.add_priming(templates.LLM_PRIMING)
+    prompt.add_problem(problem)
+
+    return prompt
+
+  def execute(self, result_history: list[Result]) -> BuildResult:
+    """Executes the agent based on previous result."""
+    if not self._discover_build_configurations():
+      logger.info('No known build configuration.',
+                  self.name,
+                  trial=result_history[-1].trial)
+      return BuildResult(benchmark=result_history[-1].benchmark,
+                         trial=result_history[-1].trial,
+                         work_dirs=result_history[-1].work_dirs,
+                         author=self,
+                         chat_history={self.name: ''})
+
+    return super().execute(result_history)