• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

google / alioth / 17114607742

21 Aug 2025 01:31AM UTC coverage: 10.411%. Remained the same
17114607742

Pull #273

github

web-flow
Merge 897d3fdbf into 7925c9625
Pull Request #273: feat: virtio packed queue

30 of 57 new or added lines in 9 files covered. (52.63%)

50 existing lines in 4 files now uncovered.

714 of 6858 relevant lines covered (10.41%)

16.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/alioth/src/virtio/dev/net/net.rs
1
// Copyright 2024 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     https://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
pub mod tap;
16

17
use std::fmt::Debug;
18
use std::fs::{File, OpenOptions};
19
use std::io::{ErrorKind, IoSlice};
20
use std::mem::MaybeUninit;
21
use std::num::NonZeroU16;
22
use std::os::fd::{AsFd, AsRawFd};
23
use std::os::unix::prelude::OpenOptionsExt;
24
use std::path::{Path, PathBuf};
25
use std::sync::Arc;
26
use std::sync::mpsc::Receiver;
27
use std::thread::JoinHandle;
28

29
use bitflags::bitflags;
30
use io_uring::cqueue::Entry as Cqe;
31
use io_uring::opcode;
32
use io_uring::types::Fd;
33
use libc::{IFF_MULTI_QUEUE, IFF_NO_PI, IFF_TAP, IFF_VNET_HDR, O_NONBLOCK};
34
use mio::event::Event;
35
use mio::unix::SourceFd;
36
use mio::{Interest, Registry, Token};
37
use serde::Deserialize;
38
use serde_aco::Help;
39
use zerocopy::{FromBytes, Immutable, IntoBytes};
40

41
use crate::hv::IoeventFd;
42
use crate::mem::mapped::RamBus;
43
use crate::net::MacAddr;
44
use crate::virtio::dev::{DevParam, DeviceId, Result, Virtio, WakeEvent};
45
use crate::virtio::queue::{Descriptor, Queue, VirtQueue, copy_from_reader, copy_to_writer};
46
use crate::virtio::worker::io_uring::{ActiveIoUring, BufferAction, IoUring, VirtioIoUring};
47
use crate::virtio::worker::mio::{ActiveMio, Mio, VirtioMio};
48
use crate::virtio::worker::{Waker, WorkerApi};
49
use crate::virtio::{FEATURE_BUILT_IN, IrqSender, error};
50
use crate::{c_enum, impl_mmio_for_zerocopy};
51

52
use self::tap::{TunFeature, tun_set_iff, tun_set_offload, tun_set_vnet_hdr_sz};
53

54
#[repr(C, align(8))]
55
#[derive(Debug, Default, FromBytes, Immutable, IntoBytes)]
56
pub struct NetConfig {
57
    mac: MacAddr,
58
    status: u16,
59
    max_queue_pairs: u16,
60
    mtu: u16,
61
    speed: u32,
62
    duplex: u8,
63
    rss_max_key_size: u8,
64
    rss_max_indirection_table_length: u16,
65
    supported_hash_types: u32,
66
}
67

68
impl_mmio_for_zerocopy!(NetConfig);
69

70
c_enum! {
71
    #[derive(Default, FromBytes, Immutable, IntoBytes)]
72
    struct CtrlAck(u8);
73
    {
74
        OK = 0;
75
        ERR = 1;
76
    }
77
}
78

79
c_enum! {
80
    #[derive(Default, FromBytes, Immutable, IntoBytes)]
81
    struct CtrlClass(u8);
82
    {
83
        MQ = 4;
84
    }
85
}
86

87
c_enum! {
88
    #[derive(Default, FromBytes, Immutable, IntoBytes)]
89
    struct CtrlMq(u8);
90
    {
91
        VQ_PARIS_SET = 0;
92
    }
93
}
94

95
#[repr(C)]
96
#[derive(Debug, Default, FromBytes, Immutable, IntoBytes)]
97
struct CtrlMqParisSet {
98
    virtq_pairs: u16,
99
}
100

101
#[repr(C)]
102
#[derive(Debug, Default, FromBytes, Immutable, IntoBytes)]
103
struct CtrlHdr {
104
    class: CtrlClass,
105
    command: u8,
106
}
107

108
bitflags! {
109
    #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)]
110
    pub struct NetFeature: u128 {
111
        const CSUM = 1 << 0;
112
        const GUEST_CSUM = 1 << 1;
113
        const CTRL_GUEST_OFFLOADS = 1 << 2;
114
        const MTU = 1 << 3;
115
        const MAC = 1 << 5;
116
        const GUEST_TSO4 = 1 << 7;
117
        const GUEST_TSO6 = 1 << 8;
118
        const GUEST_ECN = 1 << 9;
119
        const GUEST_UFO = 1 << 10;
120
        const HOST_TSO4 = 1 << 11;
121
        const HOST_TSO6 = 1 << 12;
122
        const HOST_ECN = 1 << 13;
123
        const HOST_UFO = 1 << 14;
124
        const MRG_RXBUF = 1 << 15;
125
        const STATUS = 1 << 16;
126
        const CTRL_VQ = 1 << 17;
127
        const CTRL_RX = 1 << 18;
128
        const CTRL_VLAN = 1 << 19;
129
        const GUEST_ANNOUNCE = 1 << 21;
130
        const MQ = 1 << 22;
131
        const CTRL_MAC_ADDR = 1 << 23;
132
        const GUEST_USO4 = 1 << 54;
133
        const GUEST_USO6 = 1 << 55;
134
        const HOST_USO = 1 << 56;
135
        const HASH_REPORT = 1 << 57;
136
        const GUEST_HDRLEN = 1 << 59;
137
        const RSS = 1 << 60;
138
        const RSC_EXT = 1 << 61;
139
        const STANDBY = 1 << 62;
140
        const SPEED_DUPLEX = 1 << 63;
141
        const INDIRECT_DESC = 1 << 28;
142
    }
143
}
144

145
#[derive(Debug)]
146
pub struct Net {
147
    name: Arc<str>,
148
    config: Arc<NetConfig>,
149
    tap_sockets: Vec<File>,
150
    feature: NetFeature,
151
    driver_feature: NetFeature,
152
    dev_tap: Option<PathBuf>,
153
    if_name: Option<String>,
154
    api: WorkerApi,
155
}
156

157
#[derive(Debug, Deserialize, Clone, Help)]
158
pub struct NetTapParam {
159
    /// MAC address of the virtual NIC, e.g. 06:3a:76:53:da:3d.
160
    pub mac: MacAddr,
161
    /// Maximum transmission unit.
162
    pub mtu: u16,
163
    /// Number of pairs of transmit/receive queues. [default: 1]
164
    #[serde(alias = "qp")]
165
    pub queue_pairs: Option<NonZeroU16>,
166
    /// Path to the character device file of a tap interface.
167
    ///
168
    /// Required for MacVTap and IPVTap, e.g. /dev/tapX.
169
    /// Optional for TUN/TAP. [default: /dev/net/tun]
170
    pub tap: Option<PathBuf>,
171
    /// Name of a tap interface, e.g. tapX.
172
    ///
173
    /// Required for TUN/TAP. Optional for MacVTap and IPVTap.
174
    #[serde(alias = "if")]
175
    pub if_name: Option<String>,
176
    /// System API for asynchronous IO.
177
    #[serde(default)]
178
    pub api: WorkerApi,
179
}
180

181
impl DevParam for NetTapParam {
182
    type Device = Net;
183

184
    fn build(self, name: impl Into<Arc<str>>) -> Result<Net> {
×
185
        Net::new(self, name)
×
186
    }
187
}
188

189
fn new_socket(dev_tap: Option<&Path>, blocking: bool) -> Result<File> {
190
    let tap_dev = dev_tap.unwrap_or(Path::new("/dev/net/tun"));
191
    let mut opt = OpenOptions::new();
192
    opt.read(true).write(true);
193
    if !blocking {
194
        opt.custom_flags(O_NONBLOCK);
195
    }
196
    let socket = opt.open(tap_dev)?;
197
    Ok(socket)
198
}
199

200
impl Net {
201
    pub fn new(param: NetTapParam, name: impl Into<Arc<str>>) -> Result<Self> {
×
202
        let mut socket = new_socket(
203
            param.tap.as_deref(),
×
204
            matches!(param.api, WorkerApi::IoUring),
×
205
        )?;
206
        let max_queue_pairs = param.queue_pairs.map(From::from).unwrap_or(1);
×
207
        setup_socket(&mut socket, param.if_name.as_deref(), max_queue_pairs > 1)?;
×
208
        let mut dev_feat = NetFeature::MAC
×
209
            | NetFeature::MTU
×
210
            | NetFeature::CSUM
×
211
            | NetFeature::HOST_TSO4
×
212
            | NetFeature::HOST_TSO6
×
213
            | NetFeature::HOST_ECN
×
214
            | NetFeature::HOST_UFO
×
215
            | NetFeature::HOST_USO
×
216
            | NetFeature::CTRL_VQ
×
217
            | detect_tap_offload(&socket);
×
218
        if max_queue_pairs > 1 {
×
219
            dev_feat |= NetFeature::MQ;
×
220
        }
221
        let net = Net {
222
            name: name.into(),
×
223
            config: Arc::new(NetConfig {
×
224
                mac: param.mac,
225
                max_queue_pairs,
226
                mtu: param.mtu,
227
                ..Default::default()
228
            }),
229
            tap_sockets: vec![socket],
×
230
            feature: dev_feat,
231
            driver_feature: NetFeature::empty(),
×
232
            dev_tap: param.tap,
×
233
            if_name: param.if_name,
×
234
            api: param.api,
×
235
        };
236
        Ok(net)
×
237
    }
238

239
    fn handle_ctrl_queue(
240
        &mut self,
241
        desc: &mut Descriptor,
242
        registry: Option<&Registry>,
243
    ) -> Result<usize> {
244
        let Some(header) = desc
245
            .readable
246
            .first()
247
            .and_then(|b| CtrlHdr::read_from_bytes(b).ok())
248
        else {
249
            return error::InvalidBuffer.fail();
250
        };
251
        let Some(ack_byte) = desc.writable.first_mut().and_then(|v| v.first_mut()) else {
252
            return error::InvalidBuffer.fail();
253
        };
254
        let ack = match header.class {
255
            CtrlClass::MQ => match CtrlMq(header.command) {
256
                CtrlMq::VQ_PARIS_SET => {
257
                    let to_set = |b: &IoSlice| CtrlMqParisSet::read_from_bytes(b).ok();
258
                    let Some(data) = desc.readable.get(1).and_then(to_set) else {
259
                        return error::InvalidBuffer.fail();
260
                    };
261
                    let pairs = data.virtq_pairs as usize;
262
                    self.tap_sockets.truncate(pairs);
263
                    for index in self.tap_sockets.len()..pairs {
264
                        let mut socket = new_socket(
265
                            self.dev_tap.as_deref(),
266
                            matches!(self.api, WorkerApi::IoUring),
267
                        )?;
268
                        setup_socket(&mut socket, self.if_name.as_deref(), true)?;
269
                        enable_tap_offload(&mut socket, self.driver_feature)?;
270
                        if let Some(r) = registry {
271
                            r.register(
272
                                &mut SourceFd(&socket.as_raw_fd()),
273
                                Token(index),
274
                                Interest::READABLE | Interest::WRITABLE,
275
                            )?;
276
                        }
277
                        self.tap_sockets.push(socket);
278
                    }
279
                    log::info!("{}: using {pairs} pairs of queues", self.name);
280
                    CtrlAck::OK
281
                }
282
                _ => CtrlAck::ERR,
283
            },
284
            _ => CtrlAck::ERR,
285
        };
286
        *ack_byte = ack.raw();
287
        Ok(1)
288
    }
289
}
290

291
impl Virtio for Net {
292
    type Config = NetConfig;
293
    type Feature = NetFeature;
294

295
    fn id(&self) -> DeviceId {
296
        DeviceId::Net
297
    }
298

299
    fn name(&self) -> &str {
300
        &self.name
301
    }
302

303
    fn num_queues(&self) -> u16 {
304
        let data_queues = self.config.max_queue_pairs << 1;
305
        if self.feature.contains(NetFeature::CTRL_VQ) {
306
            data_queues + 1
307
        } else {
308
            data_queues
309
        }
310
    }
311

312
    fn config(&self) -> Arc<NetConfig> {
313
        self.config.clone()
314
    }
315

316
    fn feature(&self) -> u128 {
317
        self.feature.bits() | FEATURE_BUILT_IN
318
    }
319

320
    fn spawn_worker<S, E>(
321
        self,
322
        event_rx: Receiver<WakeEvent<S, E>>,
323
        memory: Arc<RamBus>,
324
        queue_regs: Arc<[Queue]>,
325
    ) -> Result<(JoinHandle<()>, Arc<Waker>)>
326
    where
327
        S: IrqSender,
328
        E: IoeventFd,
329
    {
330
        match self.api {
×
331
            WorkerApi::Mio => Mio::spawn_worker(self, event_rx, memory, queue_regs),
×
332
            WorkerApi::IoUring => IoUring::spawn_worker(self, event_rx, memory, queue_regs),
×
333
        }
334
    }
335
}
336

337
impl VirtioMio for Net {
338
    fn reset(&mut self, registry: &Registry) {
339
        self.tap_sockets.truncate(1);
340
        let _ = registry.deregister(&mut SourceFd(&self.tap_sockets[0].as_raw_fd()));
341
    }
342

343
    fn activate<'a, 'm, Q, S, E>(
344
        &mut self,
345
        feature: u128,
346
        active_mio: &mut ActiveMio<'a, 'm, Q, S, E>,
347
    ) -> Result<()>
348
    where
349
        Q: VirtQueue<'m>,
350
        S: IrqSender,
351
        E: IoeventFd,
352
    {
353
        self.driver_feature = NetFeature::from_bits_retain(feature);
×
354
        let socket = &mut self.tap_sockets[0];
×
355
        enable_tap_offload(socket, self.driver_feature)?;
×
356
        active_mio.poll.registry().register(
×
357
            &mut SourceFd(&socket.as_raw_fd()),
×
358
            Token(0),
×
359
            Interest::READABLE | Interest::WRITABLE,
×
360
        )?;
361
        Ok(())
×
362
    }
363

364
    fn handle_event<'a, 'm, Q, S, E>(
365
        &mut self,
366
        event: &Event,
367
        active_mio: &mut ActiveMio<'a, 'm, Q, S, E>,
368
    ) -> Result<()>
369
    where
370
        Q: VirtQueue<'m>,
371
        S: IrqSender,
372
        E: IoeventFd,
373
    {
374
        let token = event.token().0;
×
375
        let irq_sender = active_mio.irq_sender;
×
376
        if event.is_readable() {
×
377
            let rx_queue_index = token << 1;
×
378
            let Some(Some(queue)) = active_mio.queues.get_mut(rx_queue_index) else {
×
379
                log::error!("{}: cannot find rx queue {rx_queue_index}", self.name);
×
380
                return Ok(());
×
381
            };
382
            let Some(socket) = self.tap_sockets.get(token) else {
×
383
                log::error!("{}: cannot find tap queue {token}", self.name);
×
384
                return Ok(());
×
385
            };
NEW
386
            queue.handle_desc(rx_queue_index as u16, irq_sender, copy_from_reader(socket))?;
×
387
        }
388
        if event.is_writable() {
×
389
            let tx_queue_index = (token << 1) + 1;
×
390
            let Some(Some(queue)) = active_mio.queues.get_mut(tx_queue_index) else {
×
391
                log::error!("{}: cannot find tx queue {tx_queue_index}", self.name);
×
392
                return Ok(());
×
393
            };
394
            let Some(socket) = self.tap_sockets.get(token) else {
×
395
                log::error!("{}: cannot find tap queue {token}", self.name);
×
396
                return Ok(());
×
397
            };
NEW
398
            queue.handle_desc(tx_queue_index as u16, irq_sender, copy_to_writer(socket))?;
×
399
        }
400
        Ok(())
×
401
    }
402

403
    fn handle_queue<'a, 'm, Q, S, E>(
404
        &mut self,
405
        index: u16,
406
        active_mio: &mut ActiveMio<'a, 'm, Q, S, E>,
407
    ) -> Result<()>
408
    where
409
        Q: VirtQueue<'m>,
410
        S: IrqSender,
411
        E: IoeventFd,
412
    {
413
        let Some(Some(queue)) = active_mio.queues.get_mut(index as usize) else {
×
414
            log::error!("{}: invalid queue index {index}", self.name);
×
415
            return Ok(());
×
416
        };
417
        let irq_sender = active_mio.irq_sender;
×
418
        let registry = active_mio.poll.registry();
×
419
        if index == self.config.max_queue_pairs * 2 {
×
NEW
420
            return queue.handle_desc(index, irq_sender, |desc| {
×
NEW
421
                let len = self.handle_ctrl_queue(desc, Some(registry))?;
×
UNCOV
422
                Ok(Some(len))
×
423
            });
424
        }
425
        let Some(socket) = self.tap_sockets.get(index as usize >> 1) else {
×
426
            log::error!("{}: invalid tap queue {}", self.name, index >> 1);
×
427
            return Ok(());
×
428
        };
429
        if index & 1 == 0 {
×
NEW
430
            queue.handle_desc(index, irq_sender, copy_from_reader(socket))
×
431
        } else {
NEW
432
            queue.handle_desc(index, irq_sender, copy_to_writer(socket))
×
433
        }
434
    }
435
}
436

437
impl VirtioIoUring for Net {
438
    fn activate<'a, 'm, Q, S, E>(
439
        &mut self,
440
        feature: u128,
441
        _ring: &mut ActiveIoUring<'a, 'm, Q, S, E>,
442
    ) -> Result<()>
443
    where
444
        S: IrqSender,
445
        Q: VirtQueue<'m>,
446
        E: IoeventFd,
447
    {
448
        self.driver_feature = NetFeature::from_bits_retain(feature);
×
449
        let socket = &mut self.tap_sockets[0];
×
450
        enable_tap_offload(socket, self.driver_feature)?;
×
451
        Ok(())
×
452
    }
453

454
    fn handle_buffer(
455
        &mut self,
456
        q_index: u16,
457
        buffer: &mut Descriptor,
458
        _irq_sender: &impl IrqSender,
459
    ) -> Result<BufferAction> {
460
        if q_index == self.config.max_queue_pairs * 2 {
461
            let len = self.handle_ctrl_queue(buffer, None)?;
462
            return Ok(BufferAction::Written(len));
463
        }
464
        let Some(socket) = self.tap_sockets.get(q_index as usize >> 1) else {
465
            log::error!("{}: invalid tap queue {}", self.name, q_index >> 1);
466
            return Ok(BufferAction::Written(0));
467
        };
468
        let entry = if q_index & 1 == 0 {
469
            let writable = &buffer.writable;
470
            opcode::Readv::new(
471
                Fd(socket.as_raw_fd()),
472
                writable.as_ptr() as *const _,
473
                writable.len() as _,
474
            )
475
            .build()
476
        } else {
477
            let readable = &buffer.readable;
478
            opcode::Writev::new(
479
                Fd(socket.as_raw_fd()),
480
                readable.as_ptr() as *const _,
481
                readable.len() as _,
482
            )
483
            .build()
484
        };
485
        Ok(BufferAction::Sqe(entry))
486
    }
487

488
    fn complete_buffer(
489
        &mut self,
490
        q_index: u16,
491
        _buffer: &mut Descriptor,
492
        cqe: &Cqe,
493
    ) -> Result<usize> {
494
        let ret = cqe.result();
495
        if ret < 0 {
496
            let err = std::io::Error::from_raw_os_error(-ret);
497
            log::error!("{}: failed to send/receive packet: {err}", self.name,);
498
            return Ok(0);
499
        }
500
        if q_index & 1 == 0 {
501
            Ok(ret as usize)
502
        } else {
503
            Ok(0)
504
        }
505
    }
506
}
507

508
const VNET_HEADER_SIZE: i32 = 12;
509

510
fn setup_socket(file: &mut File, if_name: Option<&str>, mq: bool) -> Result<()> {
511
    let mut tap_ifconfig = unsafe { MaybeUninit::<libc::ifreq>::zeroed().assume_init() };
512

513
    if let Some(name) = if_name {
514
        let name_len = std::cmp::min(tap_ifconfig.ifr_name.len() - 1, name.len());
515
        tap_ifconfig.ifr_name.as_mut_bytes()[0..name_len]
516
            .copy_from_slice(&name.as_bytes()[0..name_len]);
517
    }
518

519
    let mut flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
520
    if mq {
521
        flags |= IFF_MULTI_QUEUE;
522
    }
523
    tap_ifconfig.ifr_ifru.ifru_flags = flags as i16;
524

525
    unsafe { tun_set_iff(file, &tap_ifconfig) }.or_else(|e| {
526
        if e.kind() == ErrorKind::InvalidInput && !mq {
527
            flags |= IFF_MULTI_QUEUE;
528
            tap_ifconfig.ifr_ifru.ifru_flags = flags as i16;
529
            unsafe { tun_set_iff(file, &tap_ifconfig) }
530
        } else {
531
            Err(e)
532
        }
533
    })?;
534

535
    unsafe { tun_set_vnet_hdr_sz(file, &VNET_HEADER_SIZE) }?;
536
    Ok(())
537
}
538

539
fn detect_tap_offload(tap: &impl AsFd) -> NetFeature {
540
    let mut tap_feature = TunFeature::all();
541
    let mut dev_feat = NetFeature::GUEST_CSUM
542
        | NetFeature::GUEST_TSO4
543
        | NetFeature::GUEST_TSO6
544
        | NetFeature::GUEST_ECN
545
        | NetFeature::GUEST_UFO
546
        | NetFeature::GUEST_USO4
547
        | NetFeature::GUEST_USO6;
548
    if unsafe { tun_set_offload(tap, tap_feature.bits()) }.is_ok() {
549
        return dev_feat;
550
    }
551
    tap_feature &= !(TunFeature::USO4 | TunFeature::USO6);
552
    dev_feat &= !(NetFeature::GUEST_USO4 | NetFeature::GUEST_USO6);
553
    if unsafe { tun_set_offload(tap, tap_feature.bits()) }.is_ok() {
554
        return dev_feat;
555
    }
556
    tap_feature &= !(TunFeature::UFO);
557
    dev_feat &= !NetFeature::GUEST_UFO;
558
    if unsafe { tun_set_offload(tap, tap_feature.bits()) }.is_ok() {
559
        return dev_feat;
560
    }
561
    NetFeature::empty()
562
}
563

564
fn enable_tap_offload(tap: &mut File, feature: NetFeature) -> Result<()> {
565
    let mut tap_feature = TunFeature::empty();
566
    if feature.contains(NetFeature::GUEST_CSUM) {
567
        tap_feature |= TunFeature::CSUM;
568
    }
569
    if feature.contains(NetFeature::GUEST_TSO4) {
570
        tap_feature |= TunFeature::TSO4;
571
    }
572
    if feature.contains(NetFeature::GUEST_TSO6) {
573
        tap_feature |= TunFeature::TSO6;
574
    }
575
    if feature.contains(NetFeature::GUEST_ECN) {
576
        tap_feature |= TunFeature::TSO_ECN;
577
    }
578
    if feature.contains(NetFeature::GUEST_UFO) {
579
        tap_feature |= TunFeature::UFO;
580
    }
581
    if feature.contains(NetFeature::GUEST_USO4) {
582
        tap_feature |= TunFeature::USO4;
583
    }
584
    if feature.contains(NetFeature::GUEST_USO6) {
585
        tap_feature |= TunFeature::USO6;
586
    }
587
    unsafe { tun_set_offload(tap, tap_feature.bits()) }?;
588
    Ok(())
589
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc