|
| 1 | +# Copyright 2016 Google Inc. All Rights Reserved. |
| 2 | +# |
| 3 | +# This script is used to help the compiler wrapper in the Android build system |
| 4 | +# bisect for bad object files. |
| 5 | +"""Utilities for bisection of Android object files. |
| 6 | +
|
| 7 | +This module contains a set of utilities to allow bisection between |
| 8 | +two sets (good and bad) of object files. Mostly used to find compiler |
| 9 | +bugs. |
| 10 | +
|
| 11 | +Reference page: |
| 12 | +https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper |
| 13 | +
|
| 14 | +Design doc: |
| 15 | +https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM |
| 16 | +""" |
| 17 | + |
| 18 | +from __future__ import print_function |
| 19 | + |
| 20 | +import contextlib |
| 21 | +import fcntl |
| 22 | +import os |
| 23 | +import shutil |
| 24 | +import subprocess |
| 25 | +import sys |
| 26 | + |
| 27 | +VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE'] |
| 28 | +GOOD_CACHE = 'good' |
| 29 | +BAD_CACHE = 'bad' |
| 30 | +LIST_FILE = os.path.join(GOOD_CACHE, '_LIST') |
| 31 | + |
| 32 | +CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1' |
| 33 | +WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1' |
| 34 | + |
| 35 | + |
| 36 | +class Error(Exception): |
| 37 | + """The general compiler wrapper error class.""" |
| 38 | + pass |
| 39 | + |
| 40 | + |
| 41 | +@contextlib.contextmanager |
| 42 | +def lock_file(path, mode): |
| 43 | + """Lock file and block if other process has lock on file. |
| 44 | +
|
| 45 | + Acquire exclusive lock for file. Only blocks other processes if they attempt |
| 46 | + to also acquire lock through this method. If only reading (modes 'r' and 'rb') |
| 47 | + then the lock is shared (i.e. many reads can happen concurrently, but only one |
| 48 | + process may write at a time). |
| 49 | +
|
| 50 | + This function is a contextmanager, meaning it's meant to be used with the |
| 51 | + "with" statement in Python. This is so cleanup and setup happens automatically |
| 52 | + and cleanly. Execution of the outer "with" statement happens at the "yield" |
| 53 | + statement. Execution resumes after the yield when the outer "with" statement |
| 54 | + ends. |
| 55 | +
|
| 56 | + Args: |
| 57 | + path: path to file being locked |
| 58 | + mode: mode to open file with ('w', 'r', etc.) |
| 59 | + """ |
| 60 | + with open(path, mode) as f: |
| 61 | + # Share the lock if just reading, make lock exclusive if writing |
| 62 | + if f.mode == 'r' or f.mode == 'rb': |
| 63 | + lock_type = fcntl.LOCK_SH |
| 64 | + else: |
| 65 | + lock_type = fcntl.LOCK_EX |
| 66 | + |
| 67 | + try: |
| 68 | + fcntl.lockf(f, lock_type) |
| 69 | + yield f |
| 70 | + f.flush() |
| 71 | + except: |
| 72 | + raise |
| 73 | + finally: |
| 74 | + fcntl.lockf(f, fcntl.LOCK_UN) |
| 75 | + |
| 76 | + |
| 77 | +def log_to_file(path, execargs, link_from=None, link_to=None): |
| 78 | + """Common logging function. |
| 79 | +
|
| 80 | + Log current working directory, current execargs, and a from-to relationship |
| 81 | + between files. |
| 82 | + """ |
| 83 | + with lock_file(path, 'a') as log: |
| 84 | + log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs))) |
| 85 | + if link_from and link_to: |
| 86 | + log.write('%s -> %s\n' % (link_from, link_to)) |
| 87 | + |
| 88 | + |
| 89 | +def exec_and_return(execargs): |
| 90 | + """Execute process and return. |
| 91 | +
|
| 92 | + Execute according to execargs and return immediately. Don't inspect |
| 93 | + stderr or stdout. |
| 94 | + """ |
| 95 | + return subprocess.call(execargs) |
| 96 | + |
| 97 | + |
| 98 | +def which_cache(obj_file): |
| 99 | + """Determine which cache an object belongs to. |
| 100 | +
|
| 101 | + The binary search tool creates two files for each search iteration listing |
| 102 | + the full set of bad objects and full set of good objects. We use this to |
| 103 | + determine where an object file should be linked from (good or bad). |
| 104 | + """ |
| 105 | + bad_set_file = os.environ.get('BISECT_BAD_SET') |
| 106 | + ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file]) |
| 107 | + if ret == 0: |
| 108 | + return BAD_CACHE |
| 109 | + else: |
| 110 | + return GOOD_CACHE |
| 111 | + |
| 112 | + |
| 113 | +def makedirs(path): |
| 114 | + """Try to create directories in path.""" |
| 115 | + try: |
| 116 | + os.makedirs(path) |
| 117 | + except os.error: |
| 118 | + if not os.path.isdir(path): |
| 119 | + raise |
| 120 | + |
| 121 | + |
| 122 | +def get_obj_path(execargs): |
| 123 | + """Get the object path for the object file in the list of arguments. |
| 124 | +
|
| 125 | + Returns: |
| 126 | + Absolute object path from execution args (-o argument). If no object being |
| 127 | + outputted or output doesn't end in ".o" then return empty string. |
| 128 | + """ |
| 129 | + try: |
| 130 | + i = execargs.index('-o') |
| 131 | + except ValueError: |
| 132 | + return '' |
| 133 | + |
| 134 | + obj_path = execargs[i + 1] |
| 135 | + if not obj_path.endswith(('.o',)): |
| 136 | + # TODO: what suffixes do we need to contemplate |
| 137 | + # TODO: add this as a warning |
| 138 | + # TODO: need to handle -r compilations |
| 139 | + return '' |
| 140 | + |
| 141 | + return os.path.abspath(obj_path) |
| 142 | + |
| 143 | + |
| 144 | +def get_dep_path(execargs): |
| 145 | + """Get the dep file path for the dep file in the list of arguments. |
| 146 | +
|
| 147 | + Returns: |
| 148 | + Absolute path of dependency file path from execution args (-o argument). If |
| 149 | + no dependency being outputted then return empty string. |
| 150 | + """ |
| 151 | + if '-MD' not in execargs and '-MMD' not in execargs: |
| 152 | + return '' |
| 153 | + |
| 154 | + # If -MF given this is the path of the dependency file. Otherwise the |
| 155 | + # dependency file is the value of -o but with a .d extension |
| 156 | + if '-MF' in execargs: |
| 157 | + i = execargs.index('-MF') |
| 158 | + dep_path = execargs[i + 1] |
| 159 | + return os.path.abspath(dep_path) |
| 160 | + |
| 161 | + full_obj_path = get_obj_path(execargs) |
| 162 | + if not full_obj_path: |
| 163 | + return '' |
| 164 | + |
| 165 | + return full_obj_path[:-2] + '.d' |
| 166 | + |
| 167 | + |
| 168 | +def get_dwo_path(execargs): |
| 169 | + """Get the dwo file path for the dwo file in the list of arguments. |
| 170 | +
|
| 171 | + Returns: |
| 172 | + Absolute dwo file path from execution args (-gsplit-dwarf argument) If no |
| 173 | + dwo file being outputted then return empty string. |
| 174 | + """ |
| 175 | + if '-gsplit-dwarf' not in execargs: |
| 176 | + return '' |
| 177 | + |
| 178 | + full_obj_path = get_obj_path(execargs) |
| 179 | + if not full_obj_path: |
| 180 | + return '' |
| 181 | + |
| 182 | + return full_obj_path[:-2] + '.dwo' |
| 183 | + |
| 184 | + |
| 185 | +def in_object_list(obj_name, list_filename): |
| 186 | + """Check if object file name exist in file with object list.""" |
| 187 | + if not obj_name: |
| 188 | + return False |
| 189 | + |
| 190 | + with lock_file(list_filename, 'r') as list_file: |
| 191 | + for line in list_file: |
| 192 | + if line.strip() == obj_name: |
| 193 | + return True |
| 194 | + |
| 195 | + return False |
| 196 | + |
| 197 | + |
| 198 | +def get_side_effects(execargs): |
| 199 | + """Determine side effects generated by compiler |
| 200 | +
|
| 201 | + Returns: |
| 202 | + List of paths of objects that the compiler generates as side effects. |
| 203 | + """ |
| 204 | + side_effects = [] |
| 205 | + |
| 206 | + # Cache dependency files |
| 207 | + full_dep_path = get_dep_path(execargs) |
| 208 | + if full_dep_path: |
| 209 | + side_effects.append(full_dep_path) |
| 210 | + |
| 211 | + # Cache dwo files |
| 212 | + full_dwo_path = get_dwo_path(execargs) |
| 213 | + if full_dwo_path: |
| 214 | + side_effects.append(full_dwo_path) |
| 215 | + |
| 216 | + return side_effects |
| 217 | + |
| 218 | + |
| 219 | +def cache_file(execargs, bisect_dir, cache, abs_file_path): |
| 220 | + """Cache compiler output file (.o/.d/.dwo).""" |
| 221 | + # os.path.join fails with absolute paths, use + instead |
| 222 | + bisect_path = os.path.join(bisect_dir, cache) + abs_file_path |
| 223 | + bisect_path_dir = os.path.dirname(bisect_path) |
| 224 | + makedirs(bisect_path_dir) |
| 225 | + pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG') |
| 226 | + log_to_file(pop_log, execargs, abs_file_path, bisect_path) |
| 227 | + |
| 228 | + try: |
| 229 | + if os.path.exists(abs_file_path): |
| 230 | + shutil.copy2(abs_file_path, bisect_path) |
| 231 | + except Exception: |
| 232 | + print('Could not cache file %s' % abs_file_path, file=sys.stderr) |
| 233 | + raise |
| 234 | + |
| 235 | + |
| 236 | +def restore_file(bisect_dir, cache, abs_file_path): |
| 237 | + """Restore file from cache (.o/.d/.dwo).""" |
| 238 | + # os.path.join fails with absolute paths, use + instead |
| 239 | + cached_path = os.path.join(bisect_dir, cache) + abs_file_path |
| 240 | + if os.path.exists(cached_path): |
| 241 | + if os.path.exists(abs_file_path): |
| 242 | + os.remove(abs_file_path) |
| 243 | + try: |
| 244 | + os.link(cached_path, abs_file_path) |
| 245 | + except OSError: |
| 246 | + shutil.copyfile(cached_path, abs_file_path) |
| 247 | + else: |
| 248 | + raise Error(('%s is missing from %s cache! Unsure how to proceed. Make ' |
| 249 | + 'will now crash.' % (cache, cached_path))) |
| 250 | + |
| 251 | + |
| 252 | +def bisect_populate(execargs, bisect_dir, population_name): |
| 253 | + """Add necessary information to the bisect cache for the given execution. |
| 254 | +
|
| 255 | + Extract the necessary information for bisection from the compiler |
| 256 | + execution arguments and put it into the bisection cache. This |
| 257 | + includes copying the created object file, adding the object |
| 258 | + file path to the cache list and keeping a log of the execution. |
| 259 | +
|
| 260 | + Args: |
| 261 | + execargs: compiler execution arguments. |
| 262 | + bisect_dir: bisection directory. |
| 263 | + population_name: name of the cache being populated (good/bad). |
| 264 | + """ |
| 265 | + retval = exec_and_return(execargs) |
| 266 | + if retval: |
| 267 | + return retval |
| 268 | + |
| 269 | + full_obj_path = get_obj_path(execargs) |
| 270 | + # If not a normal compiler call then just exit |
| 271 | + if not full_obj_path: |
| 272 | + return |
| 273 | + |
| 274 | + cache_file(execargs, bisect_dir, population_name, full_obj_path) |
| 275 | + |
| 276 | + population_dir = os.path.join(bisect_dir, population_name) |
| 277 | + with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list: |
| 278 | + object_list.write('%s\n' % full_obj_path) |
| 279 | + |
| 280 | + for side_effect in get_side_effects(execargs): |
| 281 | + cache_file(execargs, bisect_dir, population_name, side_effect) |
| 282 | + |
| 283 | + |
| 284 | +def bisect_triage(execargs, bisect_dir): |
| 285 | + full_obj_path = get_obj_path(execargs) |
| 286 | + obj_list = os.path.join(bisect_dir, LIST_FILE) |
| 287 | + |
| 288 | + # If the output isn't an object file just call compiler |
| 289 | + if not full_obj_path: |
| 290 | + return exec_and_return(execargs) |
| 291 | + |
| 292 | + # If this isn't a bisected object just call compiler |
| 293 | + # This shouldn't happen! |
| 294 | + if not in_object_list(full_obj_path, obj_list): |
| 295 | + if CONTINUE_ON_MISSING: |
| 296 | + log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG') |
| 297 | + log_to_file(log_file, execargs, '? compiler', full_obj_path) |
| 298 | + return exec_and_return(execargs) |
| 299 | + else: |
| 300 | + raise Error(('%s is missing from cache! To ignore export ' |
| 301 | + 'BISECT_CONTINUE_ON_MISSING=1. See documentation for more ' |
| 302 | + 'details on this option.' % full_obj_path)) |
| 303 | + |
| 304 | + cache = which_cache(full_obj_path) |
| 305 | + |
| 306 | + # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the |
| 307 | + # result from the good/bad cache. This option is safe and covers all compiler |
| 308 | + # side effects, but is very slow! |
| 309 | + if WRAPPER_SAFE_MODE: |
| 310 | + retval = exec_and_return(execargs) |
| 311 | + if retval: |
| 312 | + return retval |
| 313 | + os.remove(full_obj_path) |
| 314 | + restore_file(bisect_dir, cache, full_obj_path) |
| 315 | + return |
| 316 | + |
| 317 | + # Generate compiler side effects. Trick Make into thinking compiler was |
| 318 | + # actually executed. |
| 319 | + for side_effect in get_side_effects(execargs): |
| 320 | + restore_file(bisect_dir, cache, side_effect) |
| 321 | + |
| 322 | + # If generated object file happened to be pruned/cleaned by Make then link it |
| 323 | + # over from cache again. |
| 324 | + if not os.path.exists(full_obj_path): |
| 325 | + restore_file(bisect_dir, cache, full_obj_path) |
| 326 | + |
| 327 | + |
| 328 | +def bisect_driver(bisect_stage, bisect_dir, execargs): |
| 329 | + """Call appropriate bisection stage according to value in bisect_stage.""" |
| 330 | + if bisect_stage == 'POPULATE_GOOD': |
| 331 | + bisect_populate(execargs, bisect_dir, GOOD_CACHE) |
| 332 | + elif bisect_stage == 'POPULATE_BAD': |
| 333 | + bisect_populate(execargs, bisect_dir, BAD_CACHE) |
| 334 | + elif bisect_stage == 'TRIAGE': |
| 335 | + bisect_triage(execargs, bisect_dir) |
| 336 | + else: |
| 337 | + raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage) |
0 commit comments