@@ -21,6 +21,9 @@ def write32u(output, value):
2121 # or unsigned.
2222 output .write (struct .pack ("<L" , value ))
2323
24+ def read32 (input ):
25+ return struct .unpack ("<I" , input .read (4 ))[0 ]
26+
2427def open (filename , mode = "rb" , compresslevel = 9 ):
2528 """Shorthand for GzipFile(filename, mode, compresslevel).
2629
@@ -161,9 +164,16 @@ def _init_write(self, filename):
161164 def _write_gzip_header (self ):
162165 self .fileobj .write ('\037 \213 ' ) # magic header
163166 self .fileobj .write ('\010 ' ) # compression method
164- fname = os .path .basename (self .name )
165- if fname .endswith (".gz" ):
166- fname = fname [:- 3 ]
167+ try :
168+ # RFC 1952 requires the FNAME field to be Latin-1. Do not
169+ # include filenames that cannot be represented that way.
170+ fname = os .path .basename (self .name )
171+ if not isinstance (fname , str ):
172+ fname = fname .encode ('latin-1' )
173+ if fname .endswith ('.gz' ):
174+ fname = fname [:- 3 ]
175+ except UnicodeEncodeError :
176+ fname = ''
167177 flags = 0
168178 if fname :
169179 flags = FNAME
@@ -181,28 +191,24 @@ def _init_read(self):
181191 self .crc = zlib .crc32 ("" ) & 0xffffffffL
182192 self .size = 0
183193
184- def _read_exact (self , n ):
185- data = self .fileobj .read (n )
186- while len (data ) < n :
187- b = self .fileobj .read (n - len (data ))
188- if not b :
189- raise EOFError ("Compressed file ended before the "
190- "end-of-stream marker was reached" )
191- data += b
192- return data
193-
194194 def _read_gzip_header (self ):
195195 magic = self .fileobj .read (2 )
196196 if magic != '\037 \213 ' :
197197 raise IOError , 'Not a gzipped file'
198-
199- method , flag , self .mtime = struct .unpack ("<BBIxx" , self ._read_exact (8 ))
198+ method = ord ( self .fileobj .read (1 ) )
200199 if method != 8 :
201200 raise IOError , 'Unknown compression method'
201+ flag = ord ( self .fileobj .read (1 ) )
202+ self .mtime = read32 (self .fileobj )
203+ # extraflag = self.fileobj.read(1)
204+ # os = self.fileobj.read(1)
205+ self .fileobj .read (2 )
202206
203207 if flag & FEXTRA :
204208 # Read & discard the extra field, if present
205- self ._read_exact (struct .unpack ("<H" , self ._read_exact (2 )))
209+ xlen = ord (self .fileobj .read (1 ))
210+ xlen = xlen + 256 * ord (self .fileobj .read (1 ))
211+ self .fileobj .read (xlen )
206212 if flag & FNAME :
207213 # Read and discard a null-terminated string containing the filename
208214 while True :
@@ -216,7 +222,7 @@ def _read_gzip_header(self):
216222 if not s or s == '\000 ' :
217223 break
218224 if flag & FHCRC :
219- self ._read_exact (2 ) # Read & discard the 16-bit header CRC
225+ self .fileobj . read (2 ) # Read & discard the 16-bit header CRC
220226
221227 def write (self ,data ):
222228 self ._check_closed ()
@@ -232,9 +238,9 @@ def write(self,data):
232238 data = data .tobytes ()
233239
234240 if len (data ) > 0 :
235- self .size = self .size + len (data )
241+ self .fileobj .write (self .compress .compress (data ))
242+ self .size += len (data )
236243 self .crc = zlib .crc32 (data , self .crc ) & 0xffffffffL
237- self .fileobj .write ( self .compress .compress (data ) )
238244 self .offset += len (data )
239245
240246 return len (data )
@@ -250,16 +256,20 @@ def read(self, size=-1):
250256
251257 readsize = 1024
252258 if size < 0 : # get the whole thing
253- while self ._read (readsize ):
254- readsize = min (self .max_read_chunk , readsize * 2 )
255- size = self .extrasize
259+ try :
260+ while True :
261+ self ._read (readsize )
262+ readsize = min (self .max_read_chunk , readsize * 2 )
263+ except EOFError :
264+ size = self .extrasize
256265 else : # just get some more of it
257- while size > self .extrasize :
258- if not self ._read (readsize ):
259- if size > self .extrasize :
260- size = self .extrasize
261- break
262- readsize = min (self .max_read_chunk , readsize * 2 )
266+ try :
267+ while size > self .extrasize :
268+ self ._read (readsize )
269+ readsize = min (self .max_read_chunk , readsize * 2 )
270+ except EOFError :
271+ if size > self .extrasize :
272+ size = self .extrasize
263273
264274 offset = self .offset - self .extrastart
265275 chunk = self .extrabuf [offset : offset + size ]
@@ -274,7 +284,7 @@ def _unread(self, buf):
274284
275285 def _read (self , size = 1024 ):
276286 if self .fileobj is None :
277- return False
287+ raise EOFError , "Reached EOF"
278288
279289 if self ._new_member :
280290 # If the _new_member flag is set, we have to
@@ -285,7 +295,7 @@ def _read(self, size=1024):
285295 pos = self .fileobj .tell () # Save current position
286296 self .fileobj .seek (0 , 2 ) # Seek to end of file
287297 if pos == self .fileobj .tell ():
288- return False
298+ raise EOFError , "Reached EOF"
289299 else :
290300 self .fileobj .seek ( pos ) # Return to original position
291301
@@ -302,10 +312,9 @@ def _read(self, size=1024):
302312
303313 if buf == "" :
304314 uncompress = self .decompress .flush ()
305- self .fileobj .seek (- len (self .decompress .unused_data ), 1 )
306315 self ._read_eof ()
307316 self ._add_read_data ( uncompress )
308- return False
317+ raise EOFError , 'Reached EOF'
309318
310319 uncompress = self .decompress .decompress (buf )
311320 self ._add_read_data ( uncompress )
@@ -315,14 +324,13 @@ def _read(self, size=1024):
315324 # so seek back to the start of the unused data, finish up
316325 # this member, and read a new gzip header.
317326 # (The number of bytes to seek back is the length of the unused
318- # data)
319- self .fileobj .seek (- len (self .decompress .unused_data ), 1 )
327+ # data, minus 8 because _read_eof() will rewind a further 8 bytes )
328+ self .fileobj .seek ( - len (self .decompress .unused_data )+ 8 , 1 )
320329
321330 # Check the CRC and file size, and set the flag so we read
322331 # a new member on the next call
323332 self ._read_eof ()
324333 self ._new_member = True
325- return True
326334
327335 def _add_read_data (self , data ):
328336 self .crc = zlib .crc32 (data , self .crc ) & 0xffffffffL
@@ -333,11 +341,14 @@ def _add_read_data(self, data):
333341 self .size = self .size + len (data )
334342
335343 def _read_eof (self ):
336- # We've read to the end of the file.
344+ # We've read to the end of the file, so we have to rewind in order
345+ # to reread the 8 bytes containing the CRC and the file size.
337346 # We check the that the computed CRC and size of the
338347 # uncompressed data matches the stored values. Note that the size
339348 # stored is the true file size mod 2**32.
340- crc32 , isize = struct .unpack ("<II" , self ._read_exact (8 ))
349+ self .fileobj .seek (- 8 , 1 )
350+ crc32 = read32 (self .fileobj )
351+ isize = read32 (self .fileobj ) # may exceed 2GB
341352 if crc32 != self .crc :
342353 raise IOError ("CRC check failed %s != %s" % (hex (crc32 ),
343354 hex (self .crc )))
@@ -358,19 +369,21 @@ def closed(self):
358369 return self .fileobj is None
359370
360371 def close (self ):
361- if self .fileobj is None :
372+ fileobj = self .fileobj
373+ if fileobj is None :
362374 return
363- if self .mode == WRITE :
364- self .fileobj .write (self .compress .flush ())
365- write32u (self .fileobj , self .crc )
366- # self.size may exceed 2GB, or even 4GB
367- write32u (self .fileobj , self .size & 0xffffffffL )
368- self .fileobj = None
369- elif self .mode == READ :
370- self .fileobj = None
371- if self .myfileobj :
372- self .myfileobj .close ()
373- self .myfileobj = None
375+ self .fileobj = None
376+ try :
377+ if self .mode == WRITE :
378+ fileobj .write (self .compress .flush ())
379+ write32u (fileobj , self .crc )
380+ # self.size may exceed 2GB, or even 4GB
381+ write32u (fileobj , self .size & 0xffffffffL )
382+ finally :
383+ myfileobj = self .myfileobj
384+ if myfileobj :
385+ self .myfileobj = None
386+ myfileobj .close ()
374387
375388 def __enter__ (self ):
376389 # __enter__ is defined in _jyio._IOBase (aka
@@ -381,20 +394,11 @@ def __enter__(self):
381394 self ._check_closed ()
382395 return self
383396
384- __iter__ = __enter__
385-
386- if not sys .platform .startswith ('java' ):
387- def flush (self ,zlib_mode = zlib .Z_SYNC_FLUSH ):
388- self ._check_closed ()
389- if self .mode == WRITE :
390- # Ensure the compressor's buffer is flushed
391- self .fileobj .write (self .compress .flush (zlib_mode ))
392- self .fileobj .flush ()
393- else :
394- # Java lacks Z_SYNC_FLUSH; thus Jython can't flush the
395- # compressobj until EOF
396- def flush (self ,zlib_mode = None ):
397- self ._check_closed ()
397+ def flush (self ,zlib_mode = zlib .Z_SYNC_FLUSH ):
398+ self ._check_closed ()
399+ if self .mode == WRITE :
400+ # Ensure the compressor's buffer is flushed
401+ self .fileobj .write (self .compress .flush (zlib_mode ))
398402 self .fileobj .flush ()
399403
400404 def fileno (self ):
0 commit comments