|
| 1 | +// Copyright (c) 2022 RethinkDNS and its authors. |
| 2 | +// |
| 3 | +// This Source Code Form is subject to the terms of the Mozilla Public |
| 4 | +// License, v. 2.0. If a copy of the MPL was not distributed with this |
| 5 | +// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 6 | +// |
| 7 | +// This file incorporates work covered by the following copyright and |
| 8 | +// permission notice: |
| 9 | +// |
| 10 | +// Copyright 2018 The gVisor Authors. |
| 11 | +// |
| 12 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 13 | +// you may not use this file except in compliance with the License. |
| 14 | +// You may obtain a copy of the License at |
| 15 | +// |
| 16 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 17 | +// |
| 18 | +// Unless required by applicable law or agreed to in writing, software |
| 19 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 20 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 21 | +// See the License for the specific language governing permissions and |
| 22 | +// limitations under the License. |
| 23 | + |
| 24 | +// Adopted from: github.com/google/gvisor/blob/f33d034/pkg/tcpip/link/fdbased/packet_dispatchers.go |
| 25 | +package netstack |
| 26 | + |
| 27 | +import ( |
| 28 | + "fmt" |
| 29 | + |
| 30 | + "golang.org/x/sys/unix" |
| 31 | + "gvisor.dev/gvisor/pkg/tcpip" |
| 32 | + "gvisor.dev/gvisor/pkg/tcpip/buffer" |
| 33 | + "gvisor.dev/gvisor/pkg/tcpip/header" |
| 34 | + "gvisor.dev/gvisor/pkg/tcpip/link/rawfile" |
| 35 | + "gvisor.dev/gvisor/pkg/tcpip/stack" |
| 36 | +) |
| 37 | + |
| 38 | +// BufConfig defines the shape of the vectorised view used to read packets from the NIC. |
| 39 | +var BufConfig = []int{128, 256, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768} |
| 40 | + |
| 41 | +type iovecBuffer struct { |
| 42 | + // views are the actual buffers that hold the packet contents. |
| 43 | + views []buffer.View |
| 44 | + |
| 45 | + // iovecs are initialized with base pointers/len of the corresponding |
| 46 | + // entries in the views defined above, except when GSO is enabled |
| 47 | + // (skipsVnetHdr) then the first iovec points to a buffer for the vnet header |
| 48 | + // which is stripped before the views are passed up the stack for further |
| 49 | + // processing. |
| 50 | + iovecs []unix.Iovec |
| 51 | + |
| 52 | + // sizes is an array of buffer sizes for the underlying views. sizes is |
| 53 | + // immutable. |
| 54 | + sizes []int |
| 55 | + |
| 56 | + // skipsVnetHdr is true if virtioNetHdr is to skipped. |
| 57 | + //skipsVnetHdr bool |
| 58 | +} |
| 59 | + |
| 60 | +func newIovecBuffer(sizes []int) *iovecBuffer { |
| 61 | + b := &iovecBuffer{ |
| 62 | + views: make([]buffer.View, len(sizes)), |
| 63 | + sizes: sizes, |
| 64 | + // skipsVnetHdr: skipsVnetHdr, |
| 65 | + } |
| 66 | + /*niov := len(b.views) |
| 67 | + if b.skipsVnetHdr { |
| 68 | + niov++ |
| 69 | + } |
| 70 | + b.iovecs = make([]unix.Iovec, niov)*/ |
| 71 | + b.iovecs = make([]unix.Iovec, len(b.views)) |
| 72 | + return b |
| 73 | +} |
| 74 | + |
| 75 | +func (b *iovecBuffer) nextIovecs() []unix.Iovec { |
| 76 | + vnetHdrOff := 0 |
| 77 | + /* if b.skipsVnetHdr { |
| 78 | + var vnetHdr [virtioNetHdrSize]byte |
| 79 | + // The kernel adds virtioNetHdr before each packet, but |
| 80 | + // we don't use it, so so we allocate a buffer for it, |
| 81 | + // add it in iovecs but don't add it in a view. |
| 82 | + b.iovecs[0] = unix.Iovec{Base: &vnetHdr[0]} |
| 83 | + b.iovecs[0].SetLen(virtioNetHdrSize) |
| 84 | + vnetHdrOff++ |
| 85 | + } |
| 86 | + */for i := range b.views { |
| 87 | + if b.views[i] != nil { |
| 88 | + break |
| 89 | + } |
| 90 | + v := buffer.NewView(b.sizes[i]) |
| 91 | + b.views[i] = v |
| 92 | + b.iovecs[i+vnetHdrOff] = unix.Iovec{Base: &v[0]} |
| 93 | + b.iovecs[i+vnetHdrOff].SetLen(len(v)) |
| 94 | + } |
| 95 | + return b.iovecs |
| 96 | +} |
| 97 | + |
| 98 | +func (b *iovecBuffer) pullViews(n int) buffer.VectorisedView { |
| 99 | + var views []buffer.View |
| 100 | + c := 0 |
| 101 | + /* if b.skipsVnetHdr { |
| 102 | + c += virtioNetHdrSize |
| 103 | + if c >= n { |
| 104 | + // Nothing in the packet. |
| 105 | + return buffer.NewVectorisedView(0, nil) |
| 106 | + } |
| 107 | + }*/ |
| 108 | + for i, v := range b.views { |
| 109 | + c += len(v) |
| 110 | + if c >= n { |
| 111 | + b.views[i].CapLength(len(v) - (c - n)) |
| 112 | + views = append([]buffer.View(nil), b.views[:i+1]...) |
| 113 | + break |
| 114 | + } |
| 115 | + } |
| 116 | + // Remove the first len(views) used views from the state. |
| 117 | + for i := range views { |
| 118 | + b.views[i] = nil |
| 119 | + } |
| 120 | + /* if b.skipsVnetHdr { |
| 121 | + // Exclude the size of the vnet header. |
| 122 | + n -= virtioNetHdrSize |
| 123 | + }*/ |
| 124 | + return buffer.NewVectorisedView(n, views) |
| 125 | +} |
| 126 | + |
| 127 | +// stopFd is an eventfd used to signal the stop of a dispatcher. |
| 128 | +type stopFd struct { |
| 129 | + efd int |
| 130 | +} |
| 131 | + |
| 132 | +func newStopFd() (stopFd, error) { |
| 133 | + efd, err := unix.Eventfd(0, unix.EFD_NONBLOCK) |
| 134 | + if err != nil { |
| 135 | + return stopFd{efd: -1}, fmt.Errorf("failed to create eventfd: %w", err) |
| 136 | + } |
| 137 | + return stopFd{efd: efd}, nil |
| 138 | +} |
| 139 | + |
| 140 | +// stop writes to the eventfd and notifies the dispatcher to stop. It does not |
| 141 | +// block. |
| 142 | +func (s *stopFd) stop() { |
| 143 | + increment := []byte{1, 0, 0, 0, 0, 0, 0, 0} |
| 144 | + if n, err := unix.Write(s.efd, increment); n != len(increment) || err != nil { |
| 145 | + // There are two possible errors documented in eventfd(2) for writing: |
| 146 | + // 1. We are writing 8 bytes and not 0xffffffffffffff, thus no EINVAL. |
| 147 | + // 2. stop is only supposed to be called once, it can't reach the limit, |
| 148 | + // thus no EAGAIN. |
| 149 | + panic(fmt.Sprintf("write(efd) = (%d, %s), want (%d, nil)", n, err, len(increment))) |
| 150 | + } |
| 151 | +} |
| 152 | + |
| 153 | +// readVDispatcher uses readv() system call to read inbound packets and |
| 154 | +// dispatches them. |
| 155 | +type readVDispatcher struct { |
| 156 | + stopFd |
| 157 | + // fd is the file descriptor used to send and receive packets. |
| 158 | + fd int |
| 159 | + |
| 160 | + // e is the endpoint this dispatcher is attached to. |
| 161 | + e *endpoint |
| 162 | + |
| 163 | + // buf is the iovec buffer that contains the packet contents. |
| 164 | + buf *iovecBuffer |
| 165 | +} |
| 166 | + |
| 167 | +func newReadVDispatcher(fd int, e *endpoint) (linkDispatcher, error) { |
| 168 | + stopFd, err := newStopFd() |
| 169 | + if err != nil { |
| 170 | + return nil, err |
| 171 | + } |
| 172 | + d := &readVDispatcher{ |
| 173 | + stopFd: stopFd, |
| 174 | + fd: fd, |
| 175 | + e: e, |
| 176 | + } |
| 177 | + |
| 178 | + // skipsVnetHdr := d.e.gsoKind == stack.HWGSOSupported |
| 179 | + d.buf = newIovecBuffer(BufConfig) |
| 180 | + return d, nil |
| 181 | +} |
| 182 | + |
| 183 | +// dispatch reads one packet from the file descriptor and dispatches it. |
| 184 | +func (d *readVDispatcher) dispatch() (bool, tcpip.Error) { |
| 185 | + n, err := rawfile.BlockingReadvUntilStopped(d.efd, d.fd, d.buf.nextIovecs()) |
| 186 | + if n <= 0 || err != nil { |
| 187 | + return false, err |
| 188 | + } |
| 189 | + |
| 190 | + pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ |
| 191 | + Data: d.buf.pullViews(n), |
| 192 | + }) |
| 193 | + defer pkt.DecRef() |
| 194 | + |
| 195 | + var p tcpip.NetworkProtocolNumber |
| 196 | + // hdrSize always zero; unused |
| 197 | + if d.e.hdrSize > 0 { |
| 198 | + hdr, ok := pkt.LinkHeader().Consume(d.e.hdrSize) |
| 199 | + if !ok { |
| 200 | + return false, nil |
| 201 | + } |
| 202 | + p = header.Ethernet(hdr).Type() |
| 203 | + } else { |
| 204 | + // We don't get any indication of what the packet is, so try to guess |
| 205 | + // if it's an IPv4 or IPv6 packet. |
| 206 | + // IP version information is at the first octet, so pulling up 1 byte. |
| 207 | + h, ok := pkt.Data().PullUp(1) |
| 208 | + if !ok { |
| 209 | + return true, nil |
| 210 | + } |
| 211 | + switch header.IPVersion(h) { |
| 212 | + case header.IPv4Version: |
| 213 | + p = header.IPv4ProtocolNumber |
| 214 | + case header.IPv6Version: |
| 215 | + p = header.IPv6ProtocolNumber |
| 216 | + default: |
| 217 | + return true, nil |
| 218 | + } |
| 219 | + } |
| 220 | + |
| 221 | + d.e.dispatcher.DeliverNetworkPacket(p, pkt) |
| 222 | + |
| 223 | + return true, nil |
| 224 | +} |
0 commit comments