Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
WIP: Pyio readfrom
  • Loading branch information
cmaloney committed Feb 11, 2025
commit e3502910899112eb9e96aeb6ca4930e6e3640e8a
1 change: 1 addition & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(entrypoint)
STRUCT_FOR_ID(env)
STRUCT_FOR_ID(errors)
STRUCT_FOR_ID(estimate)
STRUCT_FOR_ID(event)
STRUCT_FOR_ID(eventmask)
STRUCT_FOR_ID(exc_type)
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Include/internal/pycore_unicodeobject_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

111 changes: 87 additions & 24 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,83 @@ def read1(self, size=-1):
"""
return self.read(size)

def readfrom(self, file, /, *, estimate=None, limit=None):
"""Efficiently read from the provided file and return True if hit end.

Returns True if and only if a read into a non-zero length buffer
returns 0 bytes. On most systems this indicates end of file / stream.
"""
if self.closed:
raise ValueError("read from closed file")

# In order to detect end of file, need a read() of at least 1
# byte which returns size 0. Oversize the buffer by 1 byte so the
# I/O can be completed with two read() calls (one for all data, one
# for EOF) without needing to resize the buffer.
# FIXME(cmaloney): This should probably be a memoryview....
if estimate is not None:
estimate = int(estimate) + 1

# Cap to limit
if limit is not None:
limit = int(limit)
if limit <= 0:
raise ValueError(f"limit must be larger than 0, got {limit}")

if target_read is not None:
target_read = min(target_read, limit)

if self._pos - len(self._buffer) < estimate:
self._buffer.resize(self._pos + target_read)

# FIXME(cmaloney): Expand buffer if needed
start_pos = self._pos
try:
while True:
bytes_read = self._pos - start_pos
if limit is not None and limit <= bytes_read:
return False

if target_read <= 0:
# FIXME(cmaloney): Check this matces
self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))

if limit is not None and bytes_read >= limit:
return False

# Make sure there is space for the read.
if target_read :


# Cap target read
# Hit cap, not EOF.
bytes_read = self._pos - start_pos
if bytes_read >= cap:
return False

read_size = len(self._buffer) - self._pos

# Calculate next read size.
if self._pos >= len(self._buffer):
self._buffer.resize(len(self._buffer) + _new_buffersize(bytes_read))

if read_size <= 0:
# Fill remaining buffer, but never read more than cap.
read_size = len(self._buffer) - self._pos
read_size = min(start_pos - self, cap - bytes_read)

n = os.readinto(file, memoryview(self._buffer)[self._pos:])
self._pos += n
bytes_read += n
read_size -= n
if read_size <= 0:
read_size = _new_buffersize(bytes_read)
assert len(result) - bytes_read >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
except BlockingIOError:
if not bytes_read:
return None

def write(self, b):
if self.closed:
raise ValueError("write to closed file")
Expand Down Expand Up @@ -1666,38 +1743,24 @@ def readall(self):
"""
self._checkClosed()
self._checkReadable()
if self._stat_atopen is None or self._stat_atopen.st_size <= 0:
bufsize = DEFAULT_BUFFER_SIZE
else:
# In order to detect end of file, need a read() of at least 1
# byte which returns size 0. Oversize the buffer by 1 byte so the
# I/O can be completed with two read() calls (one for all data, one
# for EOF) without needing to resize the buffer.
bufsize = self._stat_atopen.st_size + 1

if self._stat_atopen.st_size > 65536:
estimate = None
if self._stat_atopen and self._stat_atopen.st_size >= 0:
estimate = self._stat_atopen.st_size
if estimate > 65536:
try:
pos = os.lseek(self._fd, 0, SEEK_CUR)
if self._stat_atopen.st_size >= pos:
bufsize = self._stat_atopen.st_size - pos + 1
estimate = estimate - pos if estimate > pos else 0
except OSError:
pass

result = bytearray(bufsize)
bytes_read = 0
bio = BytesIO()
try:
while n := os.readinto(self._fd, memoryview(result)[bytes_read:]):
bytes_read += n
if bytes_read >= len(result):
result.resize(_new_buffersize(bytes_read))
bio.readfrom(self._fd, estimate=estimate)
return bio.getvalue()
except BlockingIOError:
if not bytes_read:
return None
result = bio.getvalue()
return result if result else None

assert len(result) - bytes_read >= 1, \
"os.readinto buffer size 0 will result in erroneous EOF / returns 0"
result.resize(bytes_read)
return bytes(result)

def readinto(self, buffer):
"""Same as RawIOBase.readinto()."""
Expand Down
9 changes: 3 additions & 6 deletions Lib/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -1921,12 +1921,9 @@ def _execute_child(self, args, executable, preexec_fn, close_fds,

# Wait for exec to fail or succeed; possibly raising an
# exception (limited in size)
errpipe_data = bytearray()
while True:
part = os.read(errpipe_read, 50000)
errpipe_data += part
if not part or len(errpipe_data) > 50000:
break
bio = io.BytesIO()
bio.readfrom(errpipe_read, estimate=0, limit=50_000)
errpipe_data = bio.getvalue()
finally:
# be sure the FD is closed no matter what
os.close(errpipe_read)
Expand Down
Loading