1- """Contains PackIndex and PackFile implementations"""
1+ """Contains PackIndexFile and PackFile implementations"""
22from util import (
33 LockedFD ,
44 LazyMixin ,
55 file_contents_ro ,
66 unpack_from
77 )
88
9+ from fun import (
10+ pack_object_header_info
11+ )
912from struct import (
1013 pack ,
1114 )
1215
13- __all__ = ('PackIndex ' , 'Pack ' )
16+ __all__ = ('PackIndexFile ' , 'PackFile ' )
1417
1518
16- class PackIndex (LazyMixin ):
19+ class PackIndexFile (LazyMixin ):
1720 """A pack index provides offsets into the corresponding pack, allowing to find
1821 locations for offsets faster."""
1922
@@ -26,7 +29,7 @@ class PackIndex(LazyMixin):
2629 _sha_list_offset = 8 + 1024
2730
2831 def __init__ (self , indexpath ):
29- super (PackIndex , self ).__init__ ()
32+ super (PackIndexFile , self ).__init__ ()
3033 self ._indexpath = indexpath
3134
3235 def _set_cache_ (self , attr ):
@@ -121,9 +124,9 @@ def _initialize(self):
121124 self ._fanout_table = self ._read_fanout ((self ._version == 2 ) * 8 )
122125
123126 if self ._version == 2 :
124- self ._crc_list_offset = self ._sha_list_offset + self .size * 20
125- self ._pack_offset = self ._crc_list_offset + self .size * 4
126- self ._pack_64_offset = self ._pack_offset + self .size * 4
127+ self ._crc_list_offset = self ._sha_list_offset + self .size () * 20
128+ self ._pack_offset = self ._crc_list_offset + self .size () * 4
129+ self ._pack_64_offset = self ._pack_offset + self .size () * 4
127130 # END setup base
128131
129132 def _read_fanout (self , byte_offset ):
@@ -139,21 +142,17 @@ def _read_fanout(self, byte_offset):
139142 #} END initialization
140143
141144 #{ Properties
142- @property
143145 def version (self ):
144146 return self ._version
145147
146- @property
147148 def size (self ):
148149 """:return: amount of objects referred to by this index"""
149150 return self ._fanout_table [255 ]
150151
151- @property
152152 def packfile_checksum (self ):
153153 """:return: 20 byte sha representing the sha1 hash of the pack file"""
154154 return self ._data [- 40 :- 20 ]
155155
156- @property
157156 def indexfile_checksum (self ):
158157 """:return: 20 byte sha representing the sha1 hash of this index file"""
159158 return self ._data [- 20 :]
@@ -186,6 +185,128 @@ def sha_to_index(self, sha):
186185 #} END properties
187186
188187
189- class Pack (LazyMixin ):
190- """A pack is a file written according to the Version 2 for git packs"""
188+ class PackFile (LazyMixin ):
189+ """A pack is a file written according to the Version 2 for git packs
191190
191+ As we currently use memory maps, it could be assumed that the maximum size of
192+ packs therefor is 32 bit on 32 bit systems. On 64 bit systems, this should be
193+ fine though.
194+
195+ :note: at some point, this might be implemented using streams as well, or
196+ streams are an alternate path in the case memory maps cannot be created
197+ for some reason - one clearly doesn't want to read 10GB at once in that
198+ case"""
199+
200+ __slots__ = ('_packpath' , '_data' , '_size' , '_version' )
201+
202+ # offset into our data at which the first object starts
203+ _first_object_offset = 3 * 4 + 8
204+
205+ def __init__ (self , packpath ):
206+ self ._packpath = packpath
207+
208+ def _set_cache_ (self , attr ):
209+ if attr == '_data' :
210+ ldb = LockedFD (self ._packpath )
211+ fd = ldb .open ()
212+ self ._data = file_contents_ro (fd )
213+ ldb .rollback ()
214+ # TODO: figure out whether we should better keep the lock, or maybe
215+ # add a .keep file instead ?
216+ else :
217+ # read the header information
218+ type_id , self ._version , self ._size = unpack_from (">4sLL" , self ._data , 0 )
219+ assert type_id == "PACK" , "Pack file format is invalid: %r" % type_id
220+ assert self ._version in (2 , 3 ), "Cannot handle pack format version %i" % self ._version
221+ # END handle header
222+
223+ def _iter_objects (self , start_offset , as_stream ):
224+ """Handle the actual iteration of objects within this pack"""
225+ size = len (self ._data )
226+ cur_offset = start_offset or self ._first_object_offset
227+
228+ while cur_offset < size :
229+ type_id , uncomp_size , data_offset = pack_object_header_info (buffer (self ._data , cur_offset ))
230+
231+ # if type_id
232+ # END until we have read everything
233+
234+ #{ Interface
235+
236+ def size (self ):
237+ """:return: The amount of objects stored in this pack"""
238+ return self ._size
239+
240+ def version (self ):
241+ """:return: the version of this pack"""
242+ return self ._version
243+
244+ def checksum (self ):
245+ """:return: 20 byte sha1 hash on all object sha's contained in this file"""
246+ return self ._data [- 20 :]
247+
248+ #} END interface
249+
250+ #{ Read-Database like Interface
251+
252+ def info (self , offset ):
253+ """Retrieve information about the object at the given file-absolute offset
254+ :param offset: byte offset
255+ :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
256+ raise NotImplementedError ()
257+
258+ def stream (self , offset ):
259+ """Retrieve an object at the given file-relative offset as stream along with its information
260+ :param offset: byte offset
261+ :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
262+ raise NotImplementedError ()
263+
264+ #} END Read-Database like Interface
265+
266+
267+ class PackFileEntity (object ):
268+ """Combines the PackIndexFile and the PackFile into one, allowing the
269+ actual objects to be resolved and iterated"""
270+
271+ __slots__ = ('_index' , '_pack' )
272+
273+ IndexFileCls = PackIndexFile
274+ PackFileCls = PackFile
275+
276+ def __init__ (self , basename ):
277+ self ._index = self .IndexFileCls ("%s.idx" % basename ) # PackIndexFile instance
278+ self ._pack = self .PackFileCls ("%s.pack" % basename ) # corresponding PackFile instance
279+
280+
281+ def _iter_objects (self , as_stream ):
282+ raise NotImplementedError
283+
284+ #{ Read-Database like Interface
285+
286+ def info (self , sha ):
287+ """Retrieve information about the object identified by the given sha
288+ :param sha: 20 byte sha1
289+ :return: OInfo instance"""
290+ raise NotImplementedError ()
291+
292+ def stream (self , sha ):
293+ """Retrieve an object stream along with its information as identified by the given sha
294+ :param sha: 20 byte sha1
295+ :return: OStream instance"""
296+ raise NotImplementedError ()
297+
298+ #} END Read-Database like Interface
299+
300+ #{ Interface
301+
302+ def info_iter (self ):
303+ """:return: Iterator over all objects in this pack. The iterator yields
304+ OInfo instances"""
305+ return self ._iter_objects (as_stream = False )
306+
307+ def stream_iter (self ):
308+ """:return: iterator over all objects in this pack. The iterator yields
309+ OStream instances"""
310+ return self ._iter_objects (as_stream = True )
311+
312+ #} Interface
0 commit comments