1+ #!/usr/bin/env python2
2+ import pefile
3+ import struct
4+ import re
5+
6+ guid_regex = re .compile ("[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}" )
7+
8+
9+ def format_guid_from_hex (hex_string ):
10+ return "{0}-{1}-{2}-{3}-{4}" .format (hex_string [:8 ], hex_string [8 :12 ], hex_string [12 :16 ], hex_string [16 :20 ],
11+ hex_string [20 :])
12+
13+
14+ def read_blob (blob ):
15+ if len (blob ) == 0 :
16+ return ""
17+ first_byte = ord (blob [0 ])
18+ if first_byte & 0x80 == 0 :
19+ # easy one
20+ raw_string = blob [1 :][:first_byte ]
21+ length_determined_string = raw_string [2 :][:- 2 ]
22+ if len (length_determined_string ) != 0 :
23+ return length_determined_string [1 :]
24+ return length_determined_string
25+ # Our string is not very long
26+ return ""
27+
28+
29+ def is_dot_net_assembly (pe ):
30+ return pe .OPTIONAL_HEADER .DATA_DIRECTORY [14 ].VirtualAddress != 0
31+
32+
33+ def get_assembly_guids (assembly_path ):
34+ try :
35+ try :
36+ pe = pefile .PE (assembly_path )
37+ except pefile .PEFormatError :
38+ return None
39+ if not is_dot_net_assembly (pe ):
40+ return None
41+
42+ # Removed strict parsing and opted for simple searching method to support malformed assemblies
43+ with open (assembly_path , "rb" ) as assembly_file_handler :
44+ file_data = assembly_file_handler .read ()
45+ for i in [file_data [l .start ():] for l in re .finditer ("\x42 \x53 \x4a \x42 " , file_data )]:
46+ try :
47+ if "\x42 \x53 \x4a \x42 " not in i :
48+ continue
49+ meta_data_offset = i .find ("\x42 \x53 \x4a \x42 " )
50+ clr_version_length = struct .unpack ("<I" , i [meta_data_offset + 12 :meta_data_offset + 16 ])[0 ]
51+ try :
52+ stream_count = struct .unpack ("<H" , i [meta_data_offset + clr_version_length +
53+ 18 :meta_data_offset + clr_version_length + 20 ])[0 ]
54+ except struct .error :
55+ continue
56+ current_offset = meta_data_offset + clr_version_length + 20
57+ heaps = {}
58+ for c in xrange (stream_count ):
59+ offset = struct .unpack ("<I" , i [current_offset :current_offset + 4 ])[0 ]
60+ size = struct .unpack ("<I" , i [current_offset + 4 :current_offset + 8 ])[0 ]
61+ current_offset += 8
62+ name = ""
63+ while "\x00 " not in name :
64+ name += i [current_offset :current_offset + 4 ]
65+ current_offset += 4
66+ name = name .strip ("\x00 " )
67+ # print "{0} at {1}, {2} bytes".format(name, offset, size)
68+ heaps [name ] = i [meta_data_offset + offset :meta_data_offset + offset + size ]
69+ # if len(heaps[name]) != size:
70+ # raise
71+
72+ try :
73+ extracted_mvid = format_guid_from_hex (heaps ["#GUID" ][:16 ].encode ("hex" ))
74+ except KeyError :
75+ return {}
76+
77+ tilde = heaps ["#~" ]
78+
79+ if tilde is not None :
80+ # print "Reserved: {0}".format([tilde[0:4]])
81+ # print "Major: {0}".format([tilde[4:5]])
82+ # print "Minor: {0}".format([tilde[5:6]])
83+
84+ # print "Heap offset indication: {0}".format([tilde[6:7]])
85+ strings_heap_index_length = 2 if ord (tilde [6 :7 ]) & 0x01 == 0x00 else 4
86+ guid_heap_index_length = 2 if ord (tilde [6 :7 ]) & 0x02 == 0x00 else 4
87+ blob_heap_index_length = 2 if ord (tilde [6 :7 ]) & 0x04 == 0x00 else 4
88+
89+ # Build row length for each type up to CustomAttr
90+ row_type_widths = [
91+ # 0x00 Module = Generation (2 bytes) + Name (String heap index) + Mvid (Guid heap index) +
92+ # EncId (Guid heap index) + EncBaseId (Guid heap index)
93+ 2 + strings_heap_index_length + (guid_heap_index_length * 3 ),
94+
95+ # 0x01 TypeRef = ResolutionScope (ResolutionScope index) + TypeName (String heap) +
96+ # TypeNamespace (String heap)
97+ 2 + (strings_heap_index_length * 2 ),
98+ # 0x02 TypeDef = Flags(2 bytes) + TypeName(String heap index) +TypeNamespace(String heap index)+
99+ # Extends (TypeDefOrRef index) + FieldList (index into field table) +
100+ # MethodList (index into MethodDef table) + ?
101+ 10 + (strings_heap_index_length * 2 ),
102+ 0 , # 0x03 None
103+ # 0x04 Field = Flags (2 bytes) + Name (String heap index) + Signature (Blob heap index)
104+ 2 + strings_heap_index_length + blob_heap_index_length ,
105+ 0 , # 0x05 None
106+ # 0x06 MethodDef = RVA(4 bytes) + ImplFlags(2 bytes) + Flags(2 bytes) + Name(String heap index)+
107+ # Signature (Blob heap index) + ParamList (index to param table)
108+ 10 + strings_heap_index_length + blob_heap_index_length ,
109+ 0 , # 0x07 None
110+ # 0x08 Param = Flags (2 bytes) + Sequence (2 bytes) + Name (String heap index)
111+ 4 + strings_heap_index_length ,
112+ # 0x09 InterfaceImpl = Class (TypeDef index) + Interface (TypeDefOrRef index)
113+ 4 ,
114+ # 0x0a MemberRef = Class(MemberRefParent) + Name(String heap index) + Signature(Blob heap index)
115+ 2 + strings_heap_index_length + blob_heap_index_length ,
116+ # 0x0b Constant = Type (?) + Parent + Value (Blob heap index)
117+ 4 + blob_heap_index_length ,
118+ # 0x0c CustomAttr = Parent + Type (CustomAttributeType) + Value (Blob heap index)
119+ 4 + blob_heap_index_length ,
120+ # Don't care about the rest
121+ ]
122+
123+ # print "Reserved 0x01: {0}".format([tilde[7:8]])
124+ # print "Table list: {0}".format([tilde[8:16]])
125+
126+ tables_present = [x == "1" for x in bin (struct .unpack ("<Q" , tilde [8 :16 ])[0 ])[2 :][::- 1 ]]
127+ # tables_present_count = len([a for a in tables_present if a])
128+ # print "Tables present count: {0}".format(tables_present_count)
129+
130+ # print "Which tables are sorted list: {0}".format([tilde[16:24]])
131+
132+ row_counts = [0 ] * len (tables_present )
133+ t_offset = 24
134+ for index in xrange (len (tables_present )):
135+ if tables_present [index ]:
136+ row_counts [index ] = struct .unpack ("<I" , tilde [t_offset :t_offset + 4 ])[0 ]
137+ t_offset += 4
138+
139+ for index in xrange (0x0c ):
140+ t_offset += row_type_widths [index ] * row_counts [index ]
141+
142+ for index in xrange (row_counts [0x0c ]):
143+ # parent_index = struct.unpack("<H", tilde[t_offset:t_offset + 2])[0]
144+ # type_index = struct.unpack("<H", tilde[t_offset + 2:t_offset + 4])[0]
145+ if blob_heap_index_length == 2 :
146+ blob_index = struct .unpack ("<H" , tilde [t_offset + 4 :t_offset + 6 ])[0 ]
147+ data_value = read_blob (heaps ["#Blob" ][blob_index :])
148+ else :
149+ blob_index = struct .unpack ("<I" , tilde [t_offset + 4 :t_offset + 8 ])[0 ]
150+ data_value = read_blob (heaps ["#Blob" ][blob_index :])
151+ if guid_regex .match (data_value ):
152+ return {"mvid" : extracted_mvid .lower (), "typelib_id" : data_value .lower ()}
153+ t_offset += row_type_widths [0x0c ]
154+ return {"mvid" : extracted_mvid .lower ()}
155+ except KeyboardInterrupt :
156+ raise
157+ except :
158+ pass
159+ except KeyboardInterrupt :
160+ raise
161+ except :
162+ return {}
163+ return {}
164+
165+
166+ if __name__ == "__main__" :
167+ from argparse import ArgumentParser
168+
169+ version = "1.0.0"
170+
171+ parser = ArgumentParser (
172+ prog = __file__ ,
173+ description = "Extracts Typelib IDs and MVIDs from .NET assemblies." ,
174+ version = "%(prog)s v" + version + " by Brian Wallace (@botnet_hunter)" ,
175+ epilog = "%(prog)s v" + version + " by Brian Wallace (@botnet_hunter)"
176+ )
177+ parser .add_argument ('path' , metavar = 'path' , type = str , nargs = '*' , default = [],
178+ help = "Paths to files or directories to scan" )
179+ parser .add_argument ('-r' , '--recursive' , default = False , required = False , action = 'store_true' ,
180+ help = "Scan paths recursively" )
181+
182+ args = parser .parse_args ()
183+
184+ if args .path is None or len (args .path ) == 0 :
185+ if not args .stdin :
186+ parser .print_help ()
187+ exit ()
188+
189+ from os .path import isfile , isdir , join , abspath
190+ from glob import iglob
191+
192+ def scan_paths (paths , recursive ):
193+ while len (paths ) != 0 :
194+ temporary_file_path = abspath (paths [0 ])
195+ del paths [0 ]
196+ if isfile (temporary_file_path ):
197+ yield temporary_file_path , get_assembly_guids (temporary_file_path )
198+ elif isdir (temporary_file_path ):
199+ for p in iglob (join (temporary_file_path , "*" )):
200+ p = join (temporary_file_path , p )
201+ if isdir (p ) and recursive :
202+ paths .append (p )
203+ if isfile (p ):
204+ yield p , get_assembly_guids (p )
205+
206+ import hashlib
207+ for file_path , result in scan_paths (args .path , args .recursive ):
208+ if result is None :
209+ continue
210+ try :
211+ typelib_id = result ["typelib_id" ]
212+ except KeyError :
213+ typelib_id = "None"
214+ try :
215+ mvid = result ["mvid" ]
216+ except KeyError :
217+ # Potentially should log these results as they should at least have an MVID
218+ continue
219+
220+ with open (file_path , 'rb' ) as f :
221+ s = hashlib .sha256 (f .read ()).hexdigest ()
222+
223+ print "{0}\t {1}\t {2}" .format (typelib_id , mvid , s )
0 commit comments