2
0
Files
pgx/internal/nbconn/nbconn.go
T
Jack Christensen 811d855a35 Add non-blocking IO
This eliminates an edge case that can cause a deadlock and is a
prerequisite to cheaply testing connection liveness and to recoving a
connection after a timeout.

https://github.com/jackc/pgconn/issues/27

Squashed commit of the following:

commit 0d7b0dddea1575e9fd72592665badb8cbdd581cc
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 25 13:15:05 2022 -0500

    Add test for non-blocking IO preventing deadlock

commit 79d68d23d38bb03ddb8bf13cb45792430eaf959a
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 18 18:23:24 2022 -0500

    Release CopyFrom buf when done

commit 95a43139c7b0b7557898c4480e5b3e42417ee3c0
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 18 18:22:32 2022 -0500

    Avoid allocations with non-blocking write

commit 6b63ceee076794bc4380495a55dd414dbbd08a43
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 18 17:46:49 2022 -0500

    Simplify iobufpool usage

commit 60ecdda02e5a24c894df4f58d31c485b90de5d5b
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 18 11:51:59 2022 -0500

    Add true non-blocking IO

commit 7dd26a34a182d4aacaed3bf8c09f9cc48a7b6156
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 20:28:23 2022 -0500

    Fix block when reading more than buffered

commit afa702213f1b6d24c976406448301b2be53b7f70
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 20:10:23 2022 -0500

    More TLS support

commit 51655bf8f40321d5f89bc3c02dd55fba0ac6aa49
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 17:46:00 2022 -0500

    Steps toward TLS

commit 2b80beb1ed75f0f58db8188b87753dbc26b62098
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 13:06:29 2022 -0500

    Litle more TLS support

commit 765b2c6e7b034ff6ffab3974579fd6ee7add593b
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 12:29:30 2022 -0500

    Add testing of TLS

commit 5b64432afbed9224f9512cc46624c88e7ebec625
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 09:48:19 2022 -0500

    Introduce testVariants in prep for TLS

commit ecebd7b103d4a9125c61e83f3651b950658b0b84
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 09:32:14 2022 -0500

    Handle and test read of previously buffered data

commit 09c64d8cf3ca5be1a31bef46bf78fa5cb9fae831
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 09:04:48 2022 -0500

    Rename nbbconn to nbconn

commit 73398bc67a7b7bd1aa044fb9b0546f4198ef92d2
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 08:59:53 2022 -0500

    Remove backup files

commit f1df39a29d23ae4e5175b92c69697f2bf9b4e112
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 08:58:05 2022 -0500

    Initial passing tests

commit ea3cdab234343fc9761d9b7966c5346179cd1b01
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat Jun 4 08:38:57 2022 -0500

    Fix connect timeout

commit ca22396789d120ff556f9704f4470268fbc8c0d8
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Thu Jun 2 19:32:55 2022 -0500

    wip

commit 2e7b46d5d7454daf0859dd48f8a8e190995164c5
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Mon May 30 08:32:43 2022 -0500

    Update comments

commit 7d04dc5caa80cb147929b6f65bab60a27baaff89
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat May 28 19:43:23 2022 -0500

    Fix broken test

commit bf1edc77d70465b4097a59c08c581033d2033ac6
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat May 28 19:40:33 2022 -0500

    fixed putting wrong size bufs

commit 1f7a855b2e4d1e14f85ac5f5683e2b93db0a4bd9
Author: Jack Christensen <jack@jackchristensen.com>
Date:   Sat May 28 18:13:47 2022 -0500

    initial not quite working non-blocking conn
2022-06-25 13:15:31 -05:00

514 lines
12 KiB
Go

// Package nbconn implements a non-blocking net.Conn wrapper.
//
// It is designed to solve three problems.
//
// The first is resolving the deadlock that can occur when both sides of a connection are blocked writing because all
// buffers between are full. See https://github.com/jackc/pgconn/issues/27 for discussion.
//
// The second is the inability to use a write deadline with a TLS.Conn without killing the connection.
//
// The third is to efficiently check if a connection has been closed via a non-blocking read.
package nbconn
import (
"crypto/tls"
"errors"
"net"
"os"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/jackc/pgx/v5/internal/iobufpool"
)
var errClosed = errors.New("closed")
var ErrWouldBlock = new(wouldBlockError)
const fakeNonblockingWaitDuration = 100 * time.Millisecond
// NonBlockingDeadline is a magic value that when passed to Set[Read]Deadline places the connection in non-blocking read
// mode.
var NonBlockingDeadline = time.Date(1900, 1, 1, 0, 0, 0, 608536336, time.UTC)
// disableSetDeadlineDeadline is a magic value that when passed to Set[Read|Write]Deadline causes those methods to
// ignore all future calls.
var disableSetDeadlineDeadline = time.Date(1900, 1, 1, 0, 0, 0, 968549727, time.UTC)
// wouldBlockError implements net.Error so tls.Conn will recognize ErrWouldBlock as a temporary error.
type wouldBlockError struct{}
func (*wouldBlockError) Error() string {
return "would block"
}
func (*wouldBlockError) Timeout() bool { return true }
func (*wouldBlockError) Temporary() bool { return true }
// Conn is a net.Conn where Write never blocks and always succeeds. Flush must be called to actually write to the
// underlying connection.
type Conn interface {
net.Conn
Flush() error
}
// NetConn is a non-blocking net.Conn wrapper. It implements net.Conn.
type NetConn struct {
conn net.Conn
rawConn syscall.RawConn
readQueue bufferQueue
writeQueue bufferQueue
readFlushLock sync.Mutex
// non-blocking writes with syscall.RawConn are done with a callback function. By using these fields instead of the
// callback functions closure to pass the buf argument and receive the n and err results we avoid some allocations.
nonblockWriteBuf []byte
nonblockWriteErr error
nonblockWriteN int
readDeadlineLock sync.Mutex
readDeadline time.Time
readNonblocking bool
writeDeadlineLock sync.Mutex
writeDeadline time.Time
// Only access with atomics
closed int64 // 0 = not closed, 1 = closed
}
func NewNetConn(conn net.Conn, fakeNonBlockingIO bool) *NetConn {
nc := &NetConn{
conn: conn,
}
if !fakeNonBlockingIO {
if sc, ok := conn.(syscall.Conn); ok {
if rawConn, err := sc.SyscallConn(); err == nil {
nc.rawConn = rawConn
}
}
}
return nc
}
// Read implements io.Reader.
func (c *NetConn) Read(b []byte) (n int, err error) {
if c.isClosed() {
return 0, errClosed
}
c.readFlushLock.Lock()
defer c.readFlushLock.Unlock()
err = c.flush()
if err != nil {
return 0, err
}
for n < len(b) {
buf := c.readQueue.popFront()
if buf == nil {
break
}
copiedN := copy(b[n:], buf)
if copiedN < len(buf) {
buf = buf[copiedN:]
c.readQueue.pushFront(buf)
} else {
iobufpool.Put(buf)
}
n += copiedN
}
// If any bytes were already buffered return them without trying to do a Read. Otherwise, when the caller is trying to
// Read up to len(b) bytes but all available bytes have already been buffered the underlying Read would block.
if n > 0 {
return n, nil
}
var readNonblocking bool
c.readDeadlineLock.Lock()
readNonblocking = c.readNonblocking
c.readDeadlineLock.Unlock()
var readN int
if readNonblocking {
readN, err = c.nonblockingRead(b[n:])
} else {
readN, err = c.conn.Read(b[n:])
}
n += readN
return n, err
}
// Write implements io.Writer. It never blocks due to buffering all writes. It will only return an error if the Conn is
// closed. Call Flush to actually write to the underlying connection.
func (c *NetConn) Write(b []byte) (n int, err error) {
if c.isClosed() {
return 0, errClosed
}
buf := iobufpool.Get(len(b))
copy(buf, b)
c.writeQueue.pushBack(buf)
return len(b), nil
}
func (c *NetConn) Close() (err error) {
swapped := atomic.CompareAndSwapInt64(&c.closed, 0, 1)
if !swapped {
return errClosed
}
defer func() {
closeErr := c.conn.Close()
if err == nil {
err = closeErr
}
}()
c.readFlushLock.Lock()
defer c.readFlushLock.Unlock()
err = c.flush()
if err != nil {
return err
}
return nil
}
func (c *NetConn) LocalAddr() net.Addr {
return c.conn.LocalAddr()
}
func (c *NetConn) RemoteAddr() net.Addr {
return c.conn.RemoteAddr()
}
// SetDeadline is the equivalent of calling SetReadDealine(t) and SetWriteDeadline(t).
func (c *NetConn) SetDeadline(t time.Time) error {
err := c.SetReadDeadline(t)
if err != nil {
return err
}
return c.SetWriteDeadline(t)
}
// SetReadDeadline sets the read deadline as t. If t == NonBlockingDeadline then future reads will be non-blocking.
func (c *NetConn) SetReadDeadline(t time.Time) error {
if c.isClosed() {
return errClosed
}
c.readDeadlineLock.Lock()
defer c.readDeadlineLock.Unlock()
if c.readDeadline == disableSetDeadlineDeadline {
return nil
}
if t == disableSetDeadlineDeadline {
c.readDeadline = t
return nil
}
if t == NonBlockingDeadline {
c.readNonblocking = true
t = time.Time{}
} else {
c.readNonblocking = false
}
c.readDeadline = t
return c.conn.SetReadDeadline(t)
}
func (c *NetConn) SetWriteDeadline(t time.Time) error {
if c.isClosed() {
return errClosed
}
c.writeDeadlineLock.Lock()
defer c.writeDeadlineLock.Unlock()
if c.writeDeadline == disableSetDeadlineDeadline {
return nil
}
if t == disableSetDeadlineDeadline {
c.writeDeadline = t
return nil
}
c.writeDeadline = t
return c.conn.SetWriteDeadline(t)
}
func (c *NetConn) Flush() error {
if c.isClosed() {
return errClosed
}
c.readFlushLock.Lock()
defer c.readFlushLock.Unlock()
return c.flush()
}
// flush does the actual work of flushing the writeQueue. readFlushLock must already be held.
func (c *NetConn) flush() error {
var stopChan chan struct{}
var errChan chan error
defer func() {
if stopChan != nil {
select {
case stopChan <- struct{}{}:
case <-errChan:
}
}
}()
for buf := c.writeQueue.popFront(); buf != nil; buf = c.writeQueue.popFront() {
remainingBuf := buf
for len(remainingBuf) > 0 {
n, err := c.nonblockingWrite(remainingBuf)
remainingBuf = remainingBuf[n:]
if err != nil {
if !errors.Is(err, ErrWouldBlock) {
buf = buf[:len(remainingBuf)]
copy(buf, remainingBuf)
c.writeQueue.pushFront(buf)
return err
}
// Writing was blocked. Reading might unblock it.
if stopChan == nil {
stopChan, errChan = c.bufferNonblockingRead()
}
select {
case err := <-errChan:
stopChan = nil
return err
default:
}
}
}
iobufpool.Put(buf)
}
return nil
}
func (c *NetConn) bufferNonblockingRead() (stopChan chan struct{}, errChan chan error) {
stopChan = make(chan struct{})
errChan = make(chan error, 1)
go func() {
for {
buf := iobufpool.Get(8 * 1024)
n, err := c.nonblockingRead(buf)
if n > 0 {
buf = buf[:n]
c.readQueue.pushBack(buf)
}
if err != nil {
if !errors.Is(err, ErrWouldBlock) {
errChan <- err
return
}
}
select {
case <-stopChan:
return
default:
}
}
}()
return stopChan, errChan
}
func (c *NetConn) isClosed() bool {
closed := atomic.LoadInt64(&c.closed)
return closed == 1
}
func (c *NetConn) nonblockingWrite(b []byte) (n int, err error) {
if c.rawConn == nil {
return c.fakeNonblockingWrite(b)
} else {
return c.realNonblockingWrite(b)
}
}
func (c *NetConn) fakeNonblockingWrite(b []byte) (n int, err error) {
c.writeDeadlineLock.Lock()
defer c.writeDeadlineLock.Unlock()
deadline := time.Now().Add(fakeNonblockingWaitDuration)
if c.writeDeadline.IsZero() || deadline.Before(c.writeDeadline) {
err = c.conn.SetWriteDeadline(deadline)
if err != nil {
return 0, err
}
defer func() {
// Ignoring error resetting deadline as there is nothing that can reasonably be done if it fails.
c.conn.SetWriteDeadline(c.writeDeadline)
if err != nil {
if errors.Is(err, os.ErrDeadlineExceeded) {
err = ErrWouldBlock
}
}
}()
}
return c.conn.Write(b)
}
// realNonblockingWrite does a non-blocking write. readFlushLock must already be held.
func (c *NetConn) realNonblockingWrite(b []byte) (n int, err error) {
c.nonblockWriteBuf = b
c.nonblockWriteN = 0
c.nonblockWriteErr = nil
err = c.rawConn.Write(func(fd uintptr) (done bool) {
c.nonblockWriteN, c.nonblockWriteErr = syscall.Write(int(fd), c.nonblockWriteBuf)
return true
})
n = c.nonblockWriteN
if err == nil && c.nonblockWriteErr != nil {
if errors.Is(c.nonblockWriteErr, syscall.EWOULDBLOCK) {
err = ErrWouldBlock
} else {
err = c.nonblockWriteErr
}
}
if err != nil {
// n may be -1 when an error occurs.
if n < 0 {
n = 0
}
return n, err
}
return n, nil
}
func (c *NetConn) nonblockingRead(b []byte) (n int, err error) {
if c.rawConn == nil {
return c.fakeNonblockingRead(b)
} else {
return c.realNonblockingRead(b)
}
}
func (c *NetConn) fakeNonblockingRead(b []byte) (n int, err error) {
c.readDeadlineLock.Lock()
defer c.readDeadlineLock.Unlock()
deadline := time.Now().Add(fakeNonblockingWaitDuration)
if c.readDeadline.IsZero() || deadline.Before(c.readDeadline) {
err = c.conn.SetReadDeadline(deadline)
if err != nil {
return 0, err
}
defer func() {
// Ignoring error resetting deadline as there is nothing that can reasonably be done if it fails.
c.conn.SetReadDeadline(c.readDeadline)
if err != nil {
if errors.Is(err, os.ErrDeadlineExceeded) {
err = ErrWouldBlock
}
}
}()
}
return c.conn.Read(b)
}
func (c *NetConn) realNonblockingRead(b []byte) (n int, err error) {
var funcErr error
err = c.rawConn.Read(func(fd uintptr) (done bool) {
n, funcErr = syscall.Read(int(fd), b)
return true
})
if err == nil && funcErr != nil {
if errors.Is(funcErr, syscall.EWOULDBLOCK) {
err = ErrWouldBlock
} else {
err = funcErr
}
}
if err != nil {
// n may be -1 when an error occurs.
if n < 0 {
n = 0
}
return n, err
}
return n, nil
}
// syscall.Conn is interface
// TLSClient establishes a TLS connection as a client over conn using config.
//
// To avoid the first Read on the returned *TLSConn also triggering a Write due to the TLS handshake and thereby
// potentially causing a read and write deadlines to behave unexpectedly, Handshake is called explicitly before the
// *TLSConn is returned.
func TLSClient(conn *NetConn, config *tls.Config) (*TLSConn, error) {
tc := tls.Client(conn, config)
err := tc.Handshake()
if err != nil {
return nil, err
}
// Ensure last written part of Handshake is actually sent.
err = conn.Flush()
if err != nil {
return nil, err
}
return &TLSConn{
tlsConn: tc,
nbConn: conn,
}, nil
}
// TLSConn is a TLS wrapper around a *Conn. It works around a temporary write error (such as a timeout) being fatal to a
// tls.Conn.
type TLSConn struct {
tlsConn *tls.Conn
nbConn *NetConn
}
func (tc *TLSConn) Read(b []byte) (n int, err error) { return tc.tlsConn.Read(b) }
func (tc *TLSConn) Write(b []byte) (n int, err error) { return tc.tlsConn.Write(b) }
func (tc *TLSConn) Flush() error { return tc.nbConn.Flush() }
func (tc *TLSConn) LocalAddr() net.Addr { return tc.tlsConn.LocalAddr() }
func (tc *TLSConn) RemoteAddr() net.Addr { return tc.tlsConn.RemoteAddr() }
func (tc *TLSConn) Close() error {
// tls.Conn.closeNotify() sets a 5 second deadline to avoid blocking, sends a TLS alert close notification, and then
// sets the deadline to now. This causes NetConn's Close not to be able to flush the write buffer. Instead we set our
// own 5 second deadline then make all set deadlines no-op.
tc.tlsConn.SetDeadline(time.Now().Add(time.Second * 5))
tc.tlsConn.SetDeadline(disableSetDeadlineDeadline)
return tc.tlsConn.Close()
}
func (tc *TLSConn) SetDeadline(t time.Time) error { return tc.tlsConn.SetDeadline(t) }
func (tc *TLSConn) SetReadDeadline(t time.Time) error { return tc.tlsConn.SetReadDeadline(t) }
func (tc *TLSConn) SetWriteDeadline(t time.Time) error { return tc.tlsConn.SetWriteDeadline(t) }