Skip to content

Commit c1719aa

Browse files
committed
Initial proof-of-concept bindings
1 parent c828ac6 commit c1719aa

File tree

8 files changed

+407
-12
lines changed

8 files changed

+407
-12
lines changed

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,23 @@ Install the package via:
1010
```bash
1111
pip install codeql-python
1212
````
13+
14+
## Usage
15+
16+
```python
17+
import codeql
18+
19+
db = codeql.Database('path/to/db.zip')
20+
21+
# Queries return a CSV-like array of arrays
22+
results = db.query('select "Hello"')
23+
assert(results[0][1] == 'Hello')
24+
25+
# Queries with external libraries are supported as well
26+
codeql.set_search_path('path/to/codeql')
27+
results = db.query('''
28+
import cpp
29+
from BlockStmt block
30+
select block
31+
''')
32+
```

codeql/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66

77
# Imports
8-
from .codeql import *
9-
10-
# Prevent polluting namespace
11-
del codeql
8+
from .bqrs import *
9+
from .database import *
10+
from .query import *

codeql/bqrs.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
CodeQL for Python.
5+
"""
6+
7+
import csv
8+
import io
9+
import os
10+
import shutil
11+
import tempfile
12+
13+
from .common import *
14+
15+
class BQRS(object):
16+
def __init__(self, path):
17+
"""
18+
Arguments:
19+
path -- Location of the query results file
20+
"""
21+
# Temporaries will be cleaned up on destructor
22+
self.path = path
23+
24+
# Helpers
25+
def run_command(self, command, options=[], post=[]):
26+
return run(['bqrs', command] + options + [self.path])
27+
28+
def parse(self):
29+
path = temporary_file(suffix='.csv')
30+
self.decode(format='csv', output=path)
31+
with open(path, 'r') as f:
32+
return list(csv.reader(f, delimiter=','))
33+
34+
# Interface
35+
def info(self, format):
36+
"""
37+
Display metadata for a BQRS file.
38+
39+
This command displays an overview of the data contained in the compact binary BQRS file that is the result of executing a
40+
query. It shows the names and sizes of each result set (table) in the BQRS file, and the column types of each result set.
41+
42+
It can also optionally precompute offsets for using the pagination options of codeql bqrs decode. This is mainly useful
43+
for IDE plugins.
44+
"""
45+
options = ['-v']
46+
self.run_command('info', options)
47+
48+
def decode(self, format=None, output=None):
49+
"""
50+
Convert result data from BQRS into other forms.
51+
52+
The decoded output will be written to standard output, unless the --output option is specified.
53+
"""
54+
options = []
55+
if format:
56+
options += [f'--format={format:s}']
57+
if output:
58+
options += ['-o', output]
59+
self.run_command('decode', options)
60+
61+
def diff(self, other):
62+
"""
63+
Compute the difference between two result sets.
64+
"""
65+
if type(other) == BQRS:
66+
other = other.path
67+
self.run_command('diff', post=[other])

codeql/codeql.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

codeql/common.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
CodeQL for Python.
5+
"""
6+
7+
import os
8+
import subprocess
9+
import tempfile
10+
import uuid
11+
12+
# Configuration
13+
codeql_path = 'codeql'
14+
search_path = None
15+
library_path = None
16+
17+
# Temporaries
18+
temp_path = None
19+
20+
def temporary_root():
21+
global temp_path
22+
if temp_path is None:
23+
temp_path = tempfile.TemporaryDirectory(prefix="codeql-python_")
24+
return temp_path.name
25+
26+
def temporary_path(prefix, suffix):
27+
name = ''
28+
if prefix:
29+
name += prefix
30+
name += uuid.uuid4().hex
31+
if suffix:
32+
name += suffix
33+
return os.path.join(temporary_root(), name)
34+
35+
def temporary_dir(create=True, prefix=None, suffix=None):
36+
path = temporary_path(prefix, suffix)
37+
if create:
38+
os.mkdir(path)
39+
return path
40+
41+
def temporary_file(create=True, prefix=None, suffix=None):
42+
path = temporary_path(prefix, suffix)
43+
if create:
44+
open(path, 'a').close()
45+
return path
46+
47+
# Environment
48+
def set_search_path(path):
49+
global search_path
50+
if type(path) == list:
51+
separator = ';' if os.name == 'nt' else ':'
52+
path = separator.join(path)
53+
search_path = path
54+
55+
def run(args):
56+
command = [codeql_path] + list(map(str, args))
57+
return subprocess.run(command, stdout=subprocess.DEVNULL)

codeql/database.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
CodeQL for Python.
5+
"""
6+
7+
import os
8+
import shutil
9+
import tempfile
10+
11+
import codeql
12+
from .common import *
13+
14+
# Constants
15+
CODEQL_QLPACK = '''
16+
name: codeql-python
17+
version: 0.0.0
18+
libraryPathDependencies: {}
19+
'''
20+
21+
class Database(object):
22+
def __init__(self, path, temp=False):
23+
"""
24+
Arguments:
25+
path -- Path of the database
26+
temp -- Remove database path in destructor
27+
"""
28+
self.path = path
29+
self.temp = temp
30+
31+
def __del__(self):
32+
if self.temp:
33+
shutil.rmtree(self.path)
34+
35+
# Helpers
36+
def run_command(self, command, options=[], post=[]):
37+
run(['database', command] + options + [self.path] + post)
38+
39+
@staticmethod
40+
def create_from_cpp(code, command=None):
41+
# Get default compiler
42+
compilers = ['cxx', 'clang++', 'g++', 'cc', 'clang', 'gcc']
43+
if command is None:
44+
for compiler in compilers:
45+
if shutil.which(compiler) is not None:
46+
command = [compiler, '-c']
47+
break
48+
# Create database
49+
directory = temporary_dir()
50+
fpath = os.path.join(directory, 'source.cpp')
51+
with open(fpath, 'w') as f:
52+
f.write(code)
53+
command.append(fpath)
54+
return Database.create('cpp', directory, command)
55+
56+
def query(self, ql):
57+
"""
58+
Syntactic sugar to execute a CodeQL snippet and parse the results.
59+
"""
60+
# Prepare query directory
61+
if not hasattr(self, 'qldir'):
62+
self.qldir = temporary_dir()
63+
qlpack_path = os.path.join(self.qldir, 'qlpack.yml')
64+
with open(qlpack_path, mode='w') as f:
65+
qlpack_text = CODEQL_QLPACK.format('codeql-cpp')
66+
f.write(qlpack_text)
67+
# Perform query
68+
query_path = os.path.join(self.qldir, 'query.ql')
69+
reply_path = os.path.join(self.qldir, 'reply.csv')
70+
with open(query_path, mode='w') as f:
71+
f.write(ql)
72+
query = codeql.Query(query_path)
73+
bqrs = query.run(database=self)
74+
return bqrs.parse()
75+
76+
# Interface
77+
@staticmethod
78+
def create(language, source, command=None, location=None):
79+
"""
80+
Create a CodeQL database instance for a source tree that can be analyzed
81+
using one of the CodeQL products.
82+
83+
Arguments:
84+
language -- The language that the new database will be used to analyze.
85+
source -- The root source code directory.
86+
In many cases, this will be the checkout root. Files within it are
87+
considered to be the primary source files for this database.
88+
In some output formats, files will be referred to by their relative path
89+
from this directory.
90+
command -- For compiled languages, build commands that will cause the
91+
compiler to be invoked on the source code to analyze. These commands
92+
will be executed under an instrumentation environment that allows
93+
analysis of generated code and (in some cases) standard libraries.
94+
database -- Path to generated database
95+
"""
96+
# Syntactic sugar: Default location to temporary directory
97+
if location is None:
98+
location = temporary_dir()
99+
100+
# Create and submit command
101+
args = ['database', 'create', '-l', language, '-s', source]
102+
if command is not None:
103+
if type(command) == list:
104+
command = ' '.join(map(lambda x: f'"{x}"' if ' ' in x else x, command))
105+
args += ['-c', command]
106+
args.append(location)
107+
run(args)
108+
109+
# Return database instance
110+
return Database(location)
111+
112+
113+
def analyze(self, queries, format, output):
114+
"""
115+
Analyze a database, producing meaningful results in the context of the
116+
source code.
117+
118+
Run a query suite (or some individual queries) against a CodeQL
119+
database, producing results, styled as alerts or paths, in SARIF or
120+
another interpreted format.
121+
122+
This command combines the effect of the codeql database run-queries
123+
and codeql database interpret-results commands. If you want to run
124+
queries whose results don't meet the requirements for being interpreted
125+
as source-code alerts, use codeql database run-queries or codeql query
126+
run instead, and then codeql bqrs decode to convert the raw results to a
127+
readable notation.
128+
"""
129+
# Support single query or list of queries
130+
if type(queries) is not list:
131+
queries = [queries]
132+
# Prepare options
133+
options = [f'--format={format}', '-o', output]
134+
if search_path is not None:
135+
options += ['--search-path', search_path]
136+
# Dispatch command
137+
self.run_command('analyze', options, post=queries)
138+
139+
def upgrade(self):
140+
"""
141+
Upgrade a database so it is usable by the current tools.
142+
143+
This rewrites a CodeQL database to be compatible with the QL libraries
144+
that are found on the QL pack search path, if necessary.
145+
146+
If an upgrade is necessary, it is irreversible. The database will
147+
subsequently be unusable with the libraries that were current when it
148+
was created.
149+
"""
150+
self.run_command('upgrade')
151+
152+
def cleanup(self):
153+
"""
154+
Compact a CodeQL database on disk.
155+
156+
Delete temporary data, and generally make a database as small as
157+
possible on disk without degrading its future usefulness.
158+
"""
159+
self.run_command('cleanup')
160+
161+
def bundle(self, output):
162+
"""
163+
Create a relocatable archive of a CodeQL database.
164+
165+
A command that zips up the useful parts of the database. This will only
166+
include the mandatory components, unless the user specifically requests
167+
that results, logs, TRAP, or similar should be included.
168+
"""
169+
options = ['-o', output]
170+
self.run_command('bundle', options)

0 commit comments

Comments
 (0)