Skip to content

Commit 6d77ced

Browse files
henhousearp242
authored andcommitted
Treat io.ErrUnexpectedEOF as driver.ErrBadConn in handleError
When recvMessage does an io.ReadFull on a partially-received message body and the connection drops mid-read, the result is io.ErrUnexpectedEOF. handleError classifies io.EOF as driver.ErrBadConn but not io.ErrUnexpectedEOF, so cn.err is never set, IsValid() returns true, and database/sql keeps recycling the broken connection. The inProgress flag stays stuck at true (ReadyForQuery never arrived), and the CAS guard rejects every subsequent query with "there is already a query being processed on this connection" — permanently poisoning the pool. So treat io.ErrUnexpectedEOF the same as io.EOF in handleError: both indicate a dead connection. Fixes #1298
1 parent 71daecb commit 6d77ced

3 files changed

Lines changed: 69 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
unreleased
2+
----------
3+
4+
- Treat io.ErrUnexpectedEOF as driver.ErrBadConn so database/sql discards the
5+
connection. Since v1.12.0 this could result in permanently broken connections,
6+
especially with CockroachDB which frequently sends partial messages ([#1299]).
7+
8+
[#1299]: https://github.com/lib/pq/pull/1299
9+
110
v1.12.1 (2026-03-30)
211
--------------------
312

conn_test.go

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"runtime"
1717
"strconv"
1818
"strings"
19+
"sync/atomic"
1920
"testing"
2021
"time"
2122

@@ -507,7 +508,7 @@ func TestErrorDuringStartupClosesConn(t *testing.T) {
507508

508509
func TestBadConn(t *testing.T) {
509510
t.Parallel()
510-
for _, tt := range []error{io.EOF, &Error{Severity: pqerror.SeverityFatal}} {
511+
for _, tt := range []error{io.EOF, &Error{Severity: pqerror.SeverityFatal}, io.ErrUnexpectedEOF} {
511512
t.Run(fmt.Sprintf("%s", tt), func(t *testing.T) {
512513
var cn conn
513514
err := cn.handleError(tt)
@@ -521,6 +522,63 @@ func TestBadConn(t *testing.T) {
521522
}
522523
}
523524

525+
func TestUnexpectedEOF(t *testing.T) {
526+
t.Parallel()
527+
528+
// On the first "select truncate" it sends a correct RowDescription followed
529+
// by a truncated DataRow (header declares 96 body bytes, only 5 are sent)
530+
// and then close the connection. database/sql should discard the connection
531+
// and retry, and subsequent queries succeed.
532+
var failed atomic.Bool
533+
f := pqtest.NewFake(t, func(f pqtest.Fake, cn net.Conn) {
534+
f.Startup(cn, nil)
535+
for {
536+
code, q, ok := f.ReadMsg(cn)
537+
if !ok {
538+
return
539+
}
540+
switch code {
541+
case proto.Terminate:
542+
cn.Close()
543+
return
544+
case proto.Query:
545+
switch q := string(q[:bytes.IndexByte(q, 0)]); {
546+
case q == ";": // Ping()
547+
f.WriteMsg(cn, proto.EmptyQueryResponse, "")
548+
f.WriteMsg(cn, proto.ReadyForQuery, "I")
549+
case q == "select truncate" && !failed.Swap(true):
550+
f.WriteMsg(cn, proto.RowDescription, "\x00\x01truncate\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\xff\xff\xff\xff\xff\xff\x00\x00")
551+
cn.Write([]byte("D\x00\x00\x00\x64short"))
552+
cn.Close()
553+
return
554+
case q == "select truncate":
555+
f.SimpleQuery(cn, "SELECT", "truncate", "1")
556+
f.WriteMsg(cn, proto.ReadyForQuery, "I")
557+
case q == "select okay":
558+
f.SimpleQuery(cn, "SELECT", "okay", "1")
559+
f.WriteMsg(cn, proto.ReadyForQuery, "I")
560+
default:
561+
panic(fmt.Sprintf("unexpected query: %q", q))
562+
}
563+
}
564+
}
565+
})
566+
defer f.Close()
567+
568+
db := pqtest.MustDB(t, f.DSN())
569+
db.SetMaxOpenConns(1)
570+
db.SetMaxIdleConns(1)
571+
572+
// This should work as database/sql retries for us.
573+
pqtest.QueryRow[int](t, db, `select truncate`)
574+
if !failed.Load() {
575+
t.Fatal("select truncate never failed")
576+
}
577+
578+
// Make sure it doesn't break the connection.
579+
pqtest.QueryRow[int](t, db, `select okay`)
580+
}
581+
524582
func TestConnClose(t *testing.T) {
525583
// Ensure the underlying connection can be closed with Close after an error.
526584
t.Run("CloseBadConn", func(t *testing.T) {

error.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ func (cn *conn) handleError(reported error, query ...string) error {
307307
reported = driver.ErrBadConn
308308
}
309309
case error:
310-
if err == io.EOF || err.Error() == "remote error: handshake failure" {
310+
if err == io.EOF || err == io.ErrUnexpectedEOF || err.Error() == "remote error: handshake failure" {
311311
reported = driver.ErrBadConn
312312
}
313313
default:

0 commit comments

Comments
 (0)