From 615d44f8c1aa29ecd3294b6e43c6e38fc2ccc270 Mon Sep 17 00:00:00 2001 From: Ivan Gotovchits Date: Wed, 28 Feb 2018 12:43:32 -0500 Subject: [PATCH 1/9] minor fixes to the readme file --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e6318c5..44d2e4f 100644 --- a/README.md +++ b/README.md @@ -2,24 +2,25 @@ BAP python bindings # Installing -Install python bindings with pip (after you installed `bap`): +Install python bindings with pip (after you have installed `bap`): ```bash $ pip install bap ``` Alternatively you can just copy paste files into your project, or clone it -with git-subtree, or whatever... +with git-subtree. ## Installing low-level bindings -An optional low-level interface, called [rpc] depends on requests, so -install [requests] package from pip and `bap-server` from opam: +An optional low-level interface, called [rpc] depends on the requests +library and the bap-server package. To use it, you need to install +them from pip and opam correspondigly: ```bash $ pip install bap[rpc] -$ opam install bap +$ opam install bap-server ``` ## Installing development version @@ -34,8 +35,7 @@ pip install git+git://github.com/BinaryAnalysisPlatform/bap-python.git ```python >>> import bap ->>> proj = bap.run('/bin/true', ['--symbolizer=ida']) ->>> text = proj.sections['.text'] +>>> proj = bap.run('/bin/true') >>> main = proj.program.subs.find('main') >>> entry = main.blks[0] >>> next = main.blks.find(entry.jmps[0].target.arg) From c1b609f468251f2d99c2e4d13409b7070b356167 Mon Sep 17 00:00:00 2001 From: Ivan Gotovchits Date: Mon, 4 Jun 2018 12:03:57 -0400 Subject: [PATCH 2/9] fixes parsing section and region statements Note: this is a backport from bap-1.3.1 package, as this fix is already there, but wasn't commited to the master branch of the upstream repository. The fix enables the special handling for the Section and Region types, which use hex number without the 0x prefix. Ideally, we shouldn't generate such input, but since historically this happened, we need to make our parser robust enough to be able to chew such representation also. --- src/bap/noeval_parser.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/bap/noeval_parser.py b/src/bap/noeval_parser.py index c22bb64..b1940a7 100755 --- a/src/bap/noeval_parser.py +++ b/src/bap/noeval_parser.py @@ -2,23 +2,32 @@ ''' Parser for ADT string from bap that does not use eval -The nieve eval-based version runs into out-of-memory conditions on large files +The naive eval-based version runs into out-of-memory conditions on large files ''' import gc import sys import time -# NOTE: uses bap.bir, but cannot import at module level (circular references) +from subprocess import check_output + +# bap.1.3 breaks the format of the following types. it prints hexes +# without prefixing them with the `0x` escape. To fix it without +# fixing bap, we will treat integers inside this parents as +# hexadecimals if there is no prefix. +BROKEN_TYPES = [ + 'Section', + 'Region' +] +# NOTE: uses bap.bir, but cannot import at module level (circular references) -def toint(string, start, end): +def toint(string, start, end, base=10): ''' Convert substring string[start:end] to integer/long without eval Note: may contain leading whitespace ''' istr = string[start:end].lstrip() - if sys.version_info > (3,): # then longs don't exist if istr.endswith('L'): istr = istr.rstrip('L') @@ -31,7 +40,7 @@ def toint(string, start, end): if istr.startswith('0x'): return of_str(istr, 16) else: - return of_str(istr) + return of_str(istr, base) def setup_progress(totalitems): ''' @@ -159,17 +168,19 @@ def _parse_end(in_c, in_s, i, objs, stk): raise ParserInputError('Mismatched input stream') j = stk[-1] parent = objs[j] + ptyp = parent['typ'] assert isinstance(parent, dict) assert parent, 'parent is empty' - assert parent['typ'] != 'int', 'parent wrong type: %r' % (parent['typ']) + assert ptyp != 'int', 'parent wrong type: %r' % (parent['typ']) assert 'children' in parent if top: # add to parent if non empty # make real int before appending if top['typ'] == 'd': # int try: - top = toint(in_s, k, i) + base = 16 if ptyp in BROKEN_TYPES else 10 + top = toint(in_s, k, i, base) except ValueError: - raise ParserInputError("Integer expected between [%d..%d)" % (top, i)) + raise ParserInputError("Integer expected between [%d..%d)" % (k, i)) parent['children'].append(top) if in_c == ',': # add blank object and move on # next obj @@ -179,7 +190,6 @@ def _parse_end(in_c, in_s, i, objs, stk): return i else: # we are ending a tuple/list/app do it # maybe handle apply (num and seq are earlier) - ptyp = parent['typ'] if ptyp == '[': if in_c != ']': raise ParserInputError('close %r and open %r mismatch' % (in_c, ptyp)) @@ -325,4 +335,3 @@ def parser(input_str, disable_gc=False, logger=None): 'format': 'adt', 'load': parser } - From 5c38a964846cec740536a8904f5c06391715772d Mon Sep 17 00:00:00 2001 From: Neil Zhao Date: Sun, 8 Sep 2019 01:41:49 -0500 Subject: [PATCH 3/9] fix the arguments for Values --- src/bap/bir.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bap/bir.py b/src/bap/bir.py index fd28140..a531fb0 100644 --- a/src/bap/bir.py +++ b/src/bap/bir.py @@ -253,7 +253,8 @@ class Values(Map) : It is a mapping from the tid of a preceeding block, to an expression that denotes a value. """ - pass + def __init__(self, *args): + super().__init__([ADT(p) for p in args[0]]) class Tid(ADT) : """Tid(id,name=None) term unique identifier. From 9c811495c9a6ed3ac495233862beffecfe463cb2 Mon Sep 17 00:00:00 2001 From: Neil Zhao Date: Mon, 9 Sep 2019 14:12:13 -0500 Subject: [PATCH 4/9] make it work for both py2 and py3 --- src/bap/bir.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bap/bir.py b/src/bap/bir.py index a531fb0..a779113 100644 --- a/src/bap/bir.py +++ b/src/bap/bir.py @@ -254,7 +254,8 @@ class Values(Map) : to an expression that denotes a value. """ def __init__(self, *args): - super().__init__([ADT(p) for p in args[0]]) + super(Map,self).__init__(args) + self.elements = dict(args[0]) class Tid(ADT) : """Tid(id,name=None) term unique identifier. From c13659f82863dc3a2c9251170bac35d92bcacb5f Mon Sep 17 00:00:00 2001 From: Neil Zhao Date: Mon, 9 Sep 2019 14:45:18 -0500 Subject: [PATCH 5/9] turn off pylint warning --- src/bap/bir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bap/bir.py b/src/bap/bir.py index a779113..d310c3c 100644 --- a/src/bap/bir.py +++ b/src/bap/bir.py @@ -254,7 +254,7 @@ class Values(Map) : to an expression that denotes a value. """ def __init__(self, *args): - super(Map,self).__init__(args) + super(Map, self).__init__(args) # pylint: disable=bad-super-call self.elements = dict(args[0]) class Tid(ADT) : From ce86eb9fa259f9cbc31ec772a896661f5b8340be Mon Sep 17 00:00:00 2001 From: tnballo Date: Tue, 17 Nov 2020 15:21:27 -0500 Subject: [PATCH 6/9] Update low-level RPC API for Py3 --- README.md | 2 +- src/bap/rpc.py | 19 +++++++++---------- tests/test_low_level_interface.py | 13 +++++++++++++ 3 files changed, 23 insertions(+), 11 deletions(-) create mode 100644 tests/test_low_level_interface.py diff --git a/README.md b/README.md index 44d2e4f..70ac78b 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Installation section). ```python >>> import bap - >>> print '\n'.join(insn.asm for insn in bap.disasm("\x48\x83\xec\x08")) + >>> print '\n'.join(insn.asm for insn in bap.disasm(b"\x48\x83\xec\x08")) decl %eax subl $0x8, %esp ``` diff --git a/src/bap/rpc.py b/src/bap/rpc.py index 8ac0c1b..d182517 100644 --- a/src/bap/rpc.py +++ b/src/bap/rpc.py @@ -171,7 +171,7 @@ def load_symbols(self): def get_symbol(self, name, d=None): try: - return (s for s in self.symbols if s.name == name).next() + return next(s for s in self.symbols if s.name == name) except StopIteration: return d @@ -214,8 +214,8 @@ def __init__(self, mem, parent): def load_data(self): try: - url = (urlparse(url) for url in self.links - if urlparse(url).scheme == 'mmap').next() + url = next(urlparse(url) for url in self.links + if urlparse(url).scheme == 'mmap') qs = parse_qs(url.query) offset = int(qs['offset'][0]) with open(url.path, "rw+b") as f: @@ -266,8 +266,8 @@ def __init__(self, server={}): self.last_id = 0 for attempt in range(RETRIES): try: - self.capabilities = self.call({'init' : { - 'version' : '0.1'}}).next()['capabilities'] + self.capabilities = next(self.call({'init' : { + 'version' : '0.1'}}))['capabilities'] break except Exception: if attempt + 1 == RETRIES: @@ -278,7 +278,7 @@ def __init__(self, server={}): if not "capabilities" in self.__dict__: raise RuntimeError("Failed to connect to BAP server") self.data = {} - self.temp = NamedTemporaryFile('rw+b', prefix="bap-") + self.temp = NamedTemporaryFile('w+b', prefix="bap-") def insns(self, src, **kwargs): req = {'resource' : src} @@ -300,7 +300,7 @@ def load_file(self, name): 'url' : 'file://' + name}}) def get_resource(self, name): - return self.call({'get_resource' : name}).next() + return next(self.call({'get_resource' : name})) def load_chunk(self, data, **kwargs): kwargs.setdefault('url', self.mmap(data)) @@ -341,14 +341,13 @@ def mmap(self, data): return url def _load_resource(self, res): - rep = self.call(res).next() + rep = next(self.call(res)) if 'error' in rep: raise ServerError(rep) return Id(rep['resource']) - def jsons(r, p=0): - dec = json.JSONDecoder(encoding='utf-8') + dec = json.JSONDecoder() while True: obj,p = dec.scan_once(r.text,p) yield obj diff --git a/tests/test_low_level_interface.py b/tests/test_low_level_interface.py new file mode 100644 index 0000000..261097f --- /dev/null +++ b/tests/test_low_level_interface.py @@ -0,0 +1,13 @@ +import unittest +import bap + +class TestLowLevelInterface(unittest.TestCase): + + def test_low_level_interface(self): + asm_str = '\n'.join(insn.asm for insn in bap.disasm(b"\x48\x83\xec\x08")) + self.assertIsNotNone(asm_str) + self.assertIn("\tdecl\t%eax", asm_str) + self.assertIn("\tsubl\t$0x8, %esp", asm_str) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From a0dea5309d2f369245f7ba990a7fd63782996a3a Mon Sep 17 00:00:00 2001 From: George Macon Date: Fri, 20 Jan 2023 16:09:43 -0500 Subject: [PATCH 7/9] Import collection ABCs from new path Importing the Iterable, Sequence, and Mapping ABCs directly from collections was deprecated in Python 3.3 and the aliases were removed in Python 3.10. Attempt to import from the new location, but if it fails because the current Python is older than 3.3, fall back to the old location. --- src/bap/adt.py | 5 ++++- src/bap/bir.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bap/adt.py b/src/bap/adt.py index bb9311f..02129d9 100755 --- a/src/bap/adt.py +++ b/src/bap/adt.py @@ -182,7 +182,10 @@ def count_authors(library): """ -from collections import Iterable,Sequence,Mapping +try: + from collections.abc import Iterable,Sequence,Mapping +except ImportError: + from collections import Iterable,Sequence,Mapping class ADT(object): """Algebraic Data Type. diff --git a/src/bap/bir.py b/src/bap/bir.py index d310c3c..9606005 100644 --- a/src/bap/bir.py +++ b/src/bap/bir.py @@ -2,7 +2,10 @@ """BIR - BAP Intermediate Representation""" -from collections import Sequence,Mapping +try: + from collections.abc import Sequence,Mapping +except ImportError: + from collections import Sequence,Mapping from .adt import * from .bil import * from . import noeval_parser From 95e606daadfb459d8f7367e2c6a19cf9c454f379 Mon Sep 17 00:00:00 2001 From: Anton Kochkov Date: Sat, 2 Dec 2023 05:52:16 +0800 Subject: [PATCH 8/9] Smol fixes (#15) * Fix a typo * Disable pylint false positive --- src/bap/rpc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bap/rpc.py b/src/bap/rpc.py index d182517..a6a5663 100644 --- a/src/bap/rpc.py +++ b/src/bap/rpc.py @@ -112,11 +112,11 @@ def load(self): if self.msg is None: self.msg = self.bap.get_resource(self.ident) if not self._name in self.msg: - if 'error' in msg: + if 'error' in self.msg: raise ServerError(response) else: msg = "Expected {0} msg but got {1}".format( - self._name, msg) + self._name, self.msg) raise RuntimeError(msg) def get(self, child): @@ -126,7 +126,7 @@ def get(self, child): class Project(Resource): def __init__(self, ident, bap): - super(Image,self).__init__('program', ident, bap) + super(Image,self).__init__('program', ident, bap) # pylint: disable=bad-super-call def load_program(self): self.program = bir.loads(self.get('program')) From ac0d9f75cefedb43dd94a182188c5506648c8bc0 Mon Sep 17 00:00:00 2001 From: George Macon Date: Fri, 1 Dec 2023 16:53:13 -0500 Subject: [PATCH 9/9] Add properties for Attr and Annotation (#20) --- src/bap/bir.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/bap/bir.py b/src/bap/bir.py index 9606005..e33e2b2 100644 --- a/src/bap/bir.py +++ b/src/bap/bir.py @@ -248,7 +248,16 @@ class Attrs(Map) : class Attr(ADT) : """Attribute is a pair of attribute name and value, both represented with str""" - pass + + @property + def name(self): + """name of attribute""" + return self.arg[0] + + @property + def value(self): + """value of attribute""" + return self.arg[1] class Values(Map) : """A set of possible values, taken by a phi-node. @@ -362,7 +371,15 @@ class Annotation(ADT) : Each annotation denotes an association between a memory region and some arbitrary property, denoted with an attribute. """ - pass + @property + def region(self): + """memory region""" + return self.arg[0] + + @property + def attr(self): + """memory region attribute""" + return self.arg[1] def parse_addr(str): return int(str.split(':')[0],16)