Skip to content

Commit 0ad3049

Browse files
lieut-dataarp242
authored andcommitted
Handle pre-protocol errors to prevent memory exhaustion
When PostgreSQL cannot fork a backend process (e.g., an external process limit), it sends a plain text error like "Ecould not fork new process for connection: Resource temporarily unavailable" before any protocol handshake. The 'E' byte gets parsed as an ErrorResponse message type, but the following bytes are plain text, not a binary length field. This causes lib/pq to interpret ASCII text as a message length (e.g., "coul" = 0x636f756c = 1.6GB), leading to massive memory allocations and a likely OOM under connection pressure. This fix matches libpq's behavior in fe-connect.c: if an ErrorResponse has msgLength < 8 or > 30000, treat it as a pre-protocol plain text error and read it as a null-terminated string instead. There's a decent chance this is related to #638.
1 parent f1fae2e commit 0ad3049

4 files changed

Lines changed: 74 additions & 6 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ newer. Previously PostgreSQL 8.4 and newer were supported.
4949
ignore ENOTDIR errors, and use APPDATA on Windows ([#1214]).
5050

5151
- Fix `sslmode=verify-ca` verifying the hostname anyway when connecting to a DNS
52-
name (rather than IP) ([#1226])
52+
name (rather than IP) ([#1226]).
53+
54+
- Correctly detect pre-protocol errors such as the server not being able to fork
55+
or running out of memory ([#1248]).
5356

5457
- Fix build with wasm ([#1184]), appengine ([#745]), and Plan 9 ([#1133]).
5558

@@ -117,6 +120,7 @@ newer. Previously PostgreSQL 8.4 and newer were supported.
117120
[#1240]: https://github.com/lib/pq/pull/1240
118121
[#1243]: https://github.com/lib/pq/pull/1243
119122
[#1246]: https://github.com/lib/pq/pull/1246
123+
[#1248]: https://github.com/lib/pq/pull/1248
120124

121125

122126
v1.10.9 (2023-04-26)

conn.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,9 +1047,21 @@ func (cn *conn) recvMessage(r *readBuf) (proto.ResponseCode, error) {
10471047
return 0, err
10481048
}
10491049

1050-
// read the type and length of the message that follows
1051-
t := x[0]
1050+
// Read the type and length of the message that follows.
1051+
t := proto.ResponseCode(x[0])
10521052
n := int(binary.BigEndian.Uint32(x[1:])) - 4
1053+
1054+
// When PostgreSQL cannot start a backend (e.g., an external process limit),
1055+
// it sends plain text like "Ecould not fork new process [..]", which
1056+
// doesn't use the standard encoding for the Error message.
1057+
//
1058+
// libpq checks "if ErrorResponse && (msgLength < 8 || msgLength > MAX_ERRLEN)",
1059+
// but check < 4 since n represents bytes remaining to be read after length.
1060+
if t == proto.ErrorResponse && (n < 4 || n > proto.MaxErrlen) {
1061+
msg, _ := cn.buf.ReadString('\x00')
1062+
return 0, fmt.Errorf("pq: server error: %s%s", string(x[1:]), strings.TrimSuffix(msg, "\x00"))
1063+
}
1064+
10531065
var y []byte
10541066
if n <= len(cn.scratch) {
10551067
y = cn.scratch[:n]
@@ -1062,10 +1074,9 @@ func (cn *conn) recvMessage(r *readBuf) (proto.ResponseCode, error) {
10621074
}
10631075
*r = y
10641076
if debugProto {
1065-
fmt.Fprintf(os.Stderr, "SERVER ← %-20s %5d %q\n",
1066-
proto.ResponseCode(t), n, y)
1077+
fmt.Fprintf(os.Stderr, "SERVER ← %-20s %5d %q\n", t, n, y)
10671078
}
1068-
return proto.ResponseCode(t), nil
1079+
return t, nil
10691080
}
10701081

10711082
// recv receives a message from the backend, returning an error if an error

conn_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,3 +2165,51 @@ func TestUint64(t *testing.T) {
21652165
}
21662166
}
21672167
}
2168+
2169+
func TestPreProtocolError(t *testing.T) {
2170+
tests := []struct {
2171+
name string
2172+
msg string
2173+
wantErr string
2174+
}{
2175+
{
2176+
name: "could not fork",
2177+
msg: "could not fork new process for connection: Resource temporarily unavailable\n",
2178+
wantErr: "server error: could not fork new process for connection: Resource temporarily unavailable",
2179+
},
2180+
{
2181+
name: "too many connections",
2182+
msg: "sorry, too many clients already\n",
2183+
wantErr: "server error: sorry, too many clients already",
2184+
},
2185+
{
2186+
name: "out of memory",
2187+
msg: "out of memory\n",
2188+
wantErr: "server error: out of memory",
2189+
},
2190+
}
2191+
2192+
for _, tt := range tests {
2193+
tt := tt
2194+
t.Run(tt.name, func(t *testing.T) {
2195+
t.Parallel()
2196+
f := pqtest.NewFake(t, func(f pqtest.Fake, cn net.Conn) {
2197+
f.ReadStartup(cn)
2198+
// Send pre-protocol error: 'E' followed by plain text
2199+
// This simulates what PostgreSQL sends when it can't fork
2200+
cn.Write(append([]byte{'E'}, tt.msg...))
2201+
cn.Close()
2202+
})
2203+
defer f.Close()
2204+
2205+
db := pqtest.MustDB(t, f.DSN())
2206+
err := db.Ping()
2207+
if err == nil {
2208+
t.Fatal("expected error")
2209+
}
2210+
if !strings.Contains(err.Error(), tt.wantErr) {
2211+
t.Errorf("wrong error:\nhave: %s\nwant: %s", err, tt.wantErr)
2212+
}
2213+
})
2214+
}
2215+
}

internal/proto/proto.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ const (
1616
NegotiateGSSCode = (1234 << 16) | 5680
1717
)
1818

19+
// Constants from fe-connect.c
20+
const (
21+
MaxErrlen = 30_000 // https://github.com/postgres/postgres/blob/c6a10a89f/src/interfaces/libpq/fe-connect.c#L4067
22+
)
23+
1924
// RequestCode is a request codes sent by the frontend.
2025
type RequestCode byte
2126

0 commit comments

Comments
 (0)