Last active
July 6, 2023 03:44
-
-
Save yrom/e92ce46e1288a962fdb098f49d6f80f7 to your computer and use it in GitHub Desktop.
Convert ndk script hwasan_symbolize to python3 from 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
index dd5f859..739e7ed 100755 | |
--- a/hwasan_symbolize | |
+++ b/hwasan_symbolize | |
@@ -1,4 +1,4 @@ | |
-#!/usr/bin/env python | |
+#!/usr/bin/env python3 | |
#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===# | |
# | |
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
@@ -10,6 +10,7 @@ | |
# HWAddressSanitizer offline symbolization script. | |
# | |
#===------------------------------------------------------------------------===# | |
+import io | |
import glob | |
import os | |
import re | |
@@ -21,6 +22,7 @@ import argparse | |
last_access_address = None | |
last_access_tag = None | |
+ | |
class Symbolizer: | |
def __init__(self, path, binary_prefixes, paths_to_cut): | |
self.__pipe = None | |
@@ -36,20 +38,21 @@ class Symbolizer: | |
def __open_pipe(self): | |
if not self.__pipe: | |
self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"], | |
+ encoding='utf-8', | |
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | |
- class __EOF: | |
+ class __EOF(Exception): | |
pass | |
def __write(self, s): | |
- print >>self.__pipe.stdin, s | |
+ print(s, file=self.__pipe.stdin, flush=True) | |
if self.__log: | |
- print >>sys.stderr, ("#>> |%s|" % (s,)) | |
+ print(("#>> |%s|" % (s)), file=sys.stderr) | |
def __read(self): | |
s = self.__pipe.stdout.readline().rstrip() | |
if self.__log: | |
- print >>sys.stderr, ("# << |%s|" % (s,)) | |
+ print(("# << |%s|" % (s)), file=sys.stderr) | |
if s == '': | |
raise Symbolizer.__EOF | |
return s | |
@@ -75,7 +78,7 @@ class Symbolizer: | |
if os.path.exists(full_path): | |
return full_path | |
if name not in self.__warnings: | |
- print >>sys.stderr, "Could not find symbols for", name | |
+ print("Could not find symbols for", name, file=sys.stderr) | |
self.__warnings.add(name) | |
return None | |
@@ -117,8 +120,9 @@ class Symbolizer: | |
except Symbolizer.__EOF: | |
pass | |
+ | |
def symbolize_line(line, symbolizer_path): | |
- #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | |
+ # 0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | |
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) | |
if match: | |
frameno = match.group(2) | |
@@ -128,16 +132,17 @@ def symbolize_line(line, symbolizer_path): | |
frames = list(symbolizer.iter_call_stack(binary, addr)) | |
if len(frames) > 0: | |
- print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'), | |
- match.group(3).encode('utf-8'), frames[0][0], frames[0][1]) | |
+ print("%s#%s%s%s in %s" % (match.group(1), match.group(2), | |
+ match.group(3), frames[0][0], frames[0][1])) | |
for i in range(1, len(frames)): | |
space1 = ' ' * match.end(1) | |
space2 = ' ' * (match.start(4) - match.end(1) - 2) | |
- print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1]) | |
+ print("%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1])) | |
else: | |
- print line.rstrip().encode('utf-8') | |
+ print(line.rstrip()) | |
else: | |
- print line.rstrip().encode('utf-8') | |
+ print(line.rstrip()) | |
+ | |
def save_access_address(line): | |
global last_access_address, last_access_tag | |
@@ -148,6 +153,7 @@ def save_access_address(line): | |
if match: | |
last_access_tag = int(match.group(2), 16) | |
+ | |
def process_stack_history(line, symbolizer, ignore_tags=False): | |
if last_access_address is None or last_access_tag is None: | |
return | |
@@ -177,13 +183,14 @@ def process_stack_history(line, symbolizer, ignore_tags=False): | |
tag_offset = local[5] | |
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag): | |
continue | |
- print '' | |
- print 'Potentially referenced stack object:' | |
- print ' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]) | |
- print ' at %s' % (local[1],) | |
+ print('') | |
+ print('Potentially referenced stack object:') | |
+ print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0])) | |
+ print(' at %s' % (local[1],)) | |
return True | |
return False | |
+ | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-d', action='store_true') | |
parser.add_argument('-v', action='store_true') | |
@@ -195,20 +202,22 @@ parser.add_argument('args', nargs=argparse.REMAINDER) | |
args = parser.parse_args() | |
# Unstripped binaries location. | |
-binary_prefixes = args.symbols or [] | |
+binary_prefixes = args.symbols | |
if not binary_prefixes: | |
+ binary_prefixes = [] | |
if 'ANDROID_PRODUCT_OUT' in os.environ: | |
product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols') | |
binary_prefixes.append(product_out) | |
for p in binary_prefixes: | |
if not os.path.isdir(p): | |
- print >>sys.stderr, "Symbols path does not exist or is not a directory:", p | |
+ print("Symbols path does not exist or is not a directory:", p, file=sys.stderr) | |
sys.exit(1) | |
# Source location. | |
-paths_to_cut = args.source or [] | |
+paths_to_cut = args.source | |
if not paths_to_cut: | |
+ paths_to_cut = [] | |
paths_to_cut.append(os.getcwd() + '/') | |
if 'ANDROID_BUILD_TOP' in os.environ: | |
paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/') | |
@@ -245,6 +254,7 @@ if not symbolizer_path: | |
symbolizer_path = p | |
break | |
+ | |
def extract_version(s): | |
idx = s.rfind('-') | |
if idx == -1: | |
@@ -252,33 +262,34 @@ def extract_version(s): | |
x = float(s[idx + 1:]) | |
return x | |
+ | |
if not symbolizer_path: | |
for path in os.environ["PATH"].split(os.pathsep): | |
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*')) | |
if len(candidates) > 0: | |
- candidates.sort(key = extract_version, reverse = True) | |
+ candidates.sort(key=extract_version, reverse=True) | |
symbolizer_path = candidates[0] | |
break | |
if not os.path.exists(symbolizer_path): | |
- print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path | |
+ print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr) | |
sys.exit(1) | |
if args.v: | |
- print "Looking for symbols in:" | |
+ print("Looking for symbols in:") | |
for s in binary_prefixes: | |
- print " %s" % (s,) | |
- print "Stripping source path prefixes:" | |
+ print(" %s" % (s,)) | |
+ print("Stripping source path prefixes:") | |
for s in paths_to_cut: | |
- print " %s" % (s,) | |
- print "Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,) | |
+ print(" %s" % (s,)) | |
+ print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path)) | |
+ print() | |
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut) | |
symbolizer.enable_logging(args.d) | |
-for line in sys.stdin: | |
- line = line.decode('utf-8') | |
+input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') | |
+for line in input_stream: | |
save_access_address(line) | |
if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags): | |
continue |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===# | |
# | |
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
# See https:#llvm.org/LICENSE.txt for license information. | |
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
# | |
#===------------------------------------------------------------------------===# | |
# | |
# HWAddressSanitizer offline symbolization script. | |
# | |
#===------------------------------------------------------------------------===# | |
import io | |
import glob | |
import os | |
import re | |
import sys | |
import string | |
import subprocess | |
import argparse | |
last_access_address = None | |
last_access_tag = None | |
class Symbolizer: | |
def __init__(self, path, binary_prefixes, paths_to_cut): | |
self.__pipe = None | |
self.__path = path | |
self.__binary_prefixes = binary_prefixes | |
self.__paths_to_cut = paths_to_cut | |
self.__log = False | |
self.__warnings = set() | |
def enable_logging(self, enable): | |
self.__log = enable | |
def __open_pipe(self): | |
if not self.__pipe: | |
self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"], | |
encoding='utf-8', | |
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | |
class __EOF(Exception): | |
pass | |
def __write(self, s): | |
print(s, file=self.__pipe.stdin, flush=True) | |
if self.__log: | |
print(("#>> |%s|" % (s)), file=sys.stderr) | |
def __read(self): | |
s = self.__pipe.stdout.readline().rstrip() | |
if self.__log: | |
print(("# << |%s|" % (s)), file=sys.stderr) | |
if s == '': | |
raise Symbolizer.__EOF | |
return s | |
def __process_source_path(self, file_name): | |
for path_to_cut in self.__paths_to_cut: | |
file_name = re.sub(".*" + path_to_cut, "", file_name) | |
file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name) | |
file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name) | |
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) | |
return file_name | |
def __process_binary_name(self, name): | |
if name.startswith('/'): | |
name = name[1:] | |
for p in self.__binary_prefixes: | |
full_path = os.path.join(p, name) | |
if os.path.exists(full_path): | |
return full_path | |
# Try stripping extra path components as the last resort. | |
for p in self.__binary_prefixes: | |
full_path = os.path.join(p, os.path.basename(name)) | |
if os.path.exists(full_path): | |
return full_path | |
if name not in self.__warnings: | |
print("Could not find symbols for", name, file=sys.stderr) | |
self.__warnings.add(name) | |
return None | |
def iter_locals(self, binary, addr): | |
self.__open_pipe() | |
p = self.__pipe | |
binary = self.__process_binary_name(binary) | |
if not binary: | |
return | |
self.__write("FRAME %s %s" % (binary, addr)) | |
try: | |
while True: | |
function_name = self.__read() | |
local_name = self.__read() | |
file_line = self.__read() | |
extra = self.__read().split() | |
file_line = self.__process_source_path(file_line) | |
offset = None if extra[0] == '??' else int(extra[0]) | |
size = None if extra[1] == '??' else int(extra[1]) | |
tag_offset = None if extra[2] == '??' else int(extra[2]) | |
yield (function_name, file_line, local_name, offset, size, tag_offset) | |
except Symbolizer.__EOF: | |
pass | |
def iter_call_stack(self, binary, addr): | |
self.__open_pipe() | |
p = self.__pipe | |
binary = self.__process_binary_name(binary) | |
if not binary: | |
return | |
self.__write("CODE %s %s" % (binary, addr)) | |
try: | |
while True: | |
function_name = self.__read() | |
file_line = self.__read() | |
file_line = self.__process_source_path(file_line) | |
yield (function_name, file_line) | |
except Symbolizer.__EOF: | |
pass | |
def symbolize_line(line, symbolizer_path): | |
# 0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | |
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) | |
if match: | |
frameno = match.group(2) | |
binary = match.group(5) | |
addr = int(match.group(6), 16) | |
frames = list(symbolizer.iter_call_stack(binary, addr)) | |
if len(frames) > 0: | |
print("%s#%s%s%s in %s" % (match.group(1), match.group(2), | |
match.group(3), frames[0][0], frames[0][1])) | |
for i in range(1, len(frames)): | |
space1 = ' ' * match.end(1) | |
space2 = ' ' * (match.start(4) - match.end(1) - 2) | |
print("%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1])) | |
else: | |
print(line.rstrip()) | |
else: | |
print(line.rstrip()) | |
def save_access_address(line): | |
global last_access_address, last_access_tag | |
match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE) | |
if match: | |
last_access_address = int(match.group(2), 16) | |
match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE) | |
if match: | |
last_access_tag = int(match.group(2), 16) | |
def process_stack_history(line, symbolizer, ignore_tags=False): | |
if last_access_address is None or last_access_tag is None: | |
return | |
if re.match(r'Previously allocated frames:', line, re.UNICODE): | |
return True | |
pc_mask = (1 << 48) - 1 | |
fp_mask = (1 << 20) - 1 | |
# record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) | |
match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) | |
if match: | |
record_addr = int(match.group(2), 16) | |
record = int(match.group(3), 16) | |
binary = match.group(4) | |
addr = int(match.group(5), 16) | |
base_tag = (record_addr >> 3) & 0xFF | |
fp = (record >> 48) << 4 | |
pc = record & pc_mask | |
for local in symbolizer.iter_locals(binary, addr): | |
frame_offset = local[3] | |
size = local[4] | |
if frame_offset is None or size is None: | |
continue | |
obj_offset = (last_access_address - fp - frame_offset) & fp_mask | |
if obj_offset >= size: | |
continue | |
tag_offset = local[5] | |
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag): | |
continue | |
print('') | |
print('Potentially referenced stack object:') | |
print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0])) | |
print(' at %s' % (local[1],)) | |
return True | |
return False | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-d', action='store_true') | |
parser.add_argument('-v', action='store_true') | |
parser.add_argument('--ignore-tags', action='store_true') | |
parser.add_argument('--symbols', action='append') | |
parser.add_argument('--source', action='append') | |
parser.add_argument('--symbolizer') | |
parser.add_argument('args', nargs=argparse.REMAINDER) | |
args = parser.parse_args() | |
# Unstripped binaries location. | |
binary_prefixes = args.symbols | |
if not binary_prefixes: | |
binary_prefixes = [] | |
if 'ANDROID_PRODUCT_OUT' in os.environ: | |
product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols') | |
binary_prefixes.append(product_out) | |
for p in binary_prefixes: | |
if not os.path.isdir(p): | |
print("Symbols path does not exist or is not a directory:", p, file=sys.stderr) | |
sys.exit(1) | |
# Source location. | |
paths_to_cut = args.source | |
if not paths_to_cut: | |
paths_to_cut = [] | |
paths_to_cut.append(os.getcwd() + '/') | |
if 'ANDROID_BUILD_TOP' in os.environ: | |
paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/') | |
# llvm-symbolizer binary. | |
# 1. --symbolizer flag | |
# 2. environment variable | |
# 3. unsuffixed binary in the current directory | |
# 4. if inside Android platform, prebuilt binary at a known path | |
# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the | |
# highest available version in $PATH | |
symbolizer_path = args.symbolizer | |
if not symbolizer_path: | |
if 'LLVM_SYMBOLIZER_PATH' in os.environ: | |
symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH'] | |
elif 'HWASAN_SYMBOLIZER_PATH' in os.environ: | |
symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH'] | |
if not symbolizer_path: | |
s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer') | |
if os.path.exists(s): | |
symbolizer_path = s | |
if not symbolizer_path: | |
if 'ANDROID_BUILD_TOP' in os.environ: | |
s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer') | |
if os.path.exists(s): | |
symbolizer_path = s | |
if not symbolizer_path: | |
for path in os.environ["PATH"].split(os.pathsep): | |
p = os.path.join(path, 'llvm-symbolizer') | |
if os.path.exists(p): | |
symbolizer_path = p | |
break | |
def extract_version(s): | |
idx = s.rfind('-') | |
if idx == -1: | |
return 0 | |
x = float(s[idx + 1:]) | |
return x | |
if not symbolizer_path: | |
for path in os.environ["PATH"].split(os.pathsep): | |
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*')) | |
if len(candidates) > 0: | |
candidates.sort(key=extract_version, reverse=True) | |
symbolizer_path = candidates[0] | |
break | |
if not os.path.exists(symbolizer_path): | |
print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr) | |
sys.exit(1) | |
if args.v: | |
print("Looking for symbols in:") | |
for s in binary_prefixes: | |
print(" %s" % (s,)) | |
print("Stripping source path prefixes:") | |
for s in paths_to_cut: | |
print(" %s" % (s,)) | |
print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path)) | |
print() | |
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut) | |
symbolizer.enable_logging(args.d) | |
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') | |
for line in input_stream: | |
save_access_address(line) | |
if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags): | |
continue | |
symbolize_line(line, symbolizer_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python3 hwasan_symbolize.py3 -v --symbolizer /path/to/bin/llvm-symbolizer --symbols /path/to/symbols < crashes