From 78f631c903bd4f1978cc93244b1823b944b14700 Mon Sep 17 00:00:00 2001 From: hockeyfriend <48014457+hockeyfriend@users.noreply.github.com> Date: Mon, 23 Feb 2026 22:43:57 +0000 Subject: [PATCH] added igc device driver and added support for i210 Nics for igb driver --- README.md | 1 + lib/include/driver/igb/igb_type.h | 1 + lib/include/driver/igc/igc.h | 225 ++++++++ lib/include/driver/igc/igc_type.h | 327 +++++++++++ lib/src/Makefile | 1 + lib/src/device.cc | 21 + lib/src/driver/igc/igc.cc | 891 ++++++++++++++++++++++++++++++ 7 files changed, 1467 insertions(+) create mode 100644 lib/include/driver/igc/igc.h create mode 100644 lib/include/driver/igc/igc_type.h create mode 100644 lib/src/driver/igc/igc.cc diff --git a/README.md b/README.md index 542e0f8..d16b85f 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ See the table below for a list of device types supported by Ixl: |--------|-------------------------|---------------------------------------| | e1000 | e1000-compliant NICs | - | | igb | Intel I350, Intel 82576 | Currently lacks multi-queue support. | +| igc | Intel i225, Intel i226 | Currently lacks multi-queue support. | | ixgbe | Intel X520/X540 | - | ## Building diff --git a/lib/include/driver/igb/igb_type.h b/lib/include/driver/igb/igb_type.h index a2d7b5e..6468643 100644 --- a/lib/include/driver/igb/igb_type.h +++ b/lib/include/driver/igb/igb_type.h @@ -17,6 +17,7 @@ */ #define IGB_DEV_ID_82576 0x10C9 #define IGB_DEV_ID_I350 0x1521 +#define IGB_DEV_ID_I210 0x1533 /* * Descriptor structures diff --git a/lib/include/driver/igc/igc.h b/lib/include/driver/igc/igc.h new file mode 100644 index 0000000..1a198af --- /dev/null +++ b/lib/include/driver/igc/igc.h @@ -0,0 +1,225 @@ +/** + * @file igc.h + * @author Paul Richter (paul.richter@spreewalddreieck.de) + * @brief header of igc driver containing class Igc_device + * @version 0.1 + * @date 2025-11-07 + * + * @copyright Copyright (c) 2025 + * + * The igc device driver was developed as part of the bachelor thesis: "Generic + * aspects of porting a Linux Ethernet driver to the L4Re microkernel" It is + * nearly an identical copy of the ixl igb driver with changed register + * definitions. More information about how it works can be found there. (link + * will be inserted to a later date) + */ + + /* + Header file containing the structure of the ixl igc device driver heavly + based on the ixl igb device driver + */ + +#pragma once + +#include +#include + +#include + +#include "igc_type.h" +#include "../../pci.h" + +namespace Ixl { + +/** + * Device driver specialized for the igc NIC family (e.g. I225-IT). + * + * Note: For now, this software is only a slight adaptation of the E1000 driver, + * leaving out many features such as RSS (multi-queue receive) etc. Maybe we + * will add those later, which will make the igc driver much more similar to + * ixgbe than to the original E1000 one (even though many register definitions + * stay the same). + */ +class Igc_device : public Ixl_device { +public: + // set number of queue entries to linux default value + static const int NUM_RX_QUEUE_ENTRIES = 256; + static const int NUM_TX_QUEUE_ENTRIES = 256; + + static const int MAX_RX_QUEUE_ENTRIES = 256; + static const int MAX_TX_QUEUE_ENTRIES = 256; + + static const int PKT_BUF_ENTRY_SIZE = 2048; + + // Reserve at least twice the RX queue depth of packets for mempools. This + // is what the driver needs to remain operational as every received packet + // is immediately replaced with a fresh one in the RX path. + static const int MIN_MEMPOOL_ENTRIES = 2 * NUM_RX_QUEUE_ENTRIES; + + static const int TX_CLEAN_BATCH = 32; + + static const uint64_t INTERRUPT_INITIAL_INTERVAL = 1000 * 1000 * 1000; + + Igc_device(L4vbus::Pci_dev &&dev, struct Dev_cfg &cfg, uint32_t itr_rate); + + std::string get_driver_name(void) { + return("ixl-igc"); + } + + inline uint32_t get_max_frame_size(void) { + return 0; + } + + uint32_t rx_batch(uint16_t queue_id, struct pkt_buf* bufs[], + uint32_t num_bufs); + + uint32_t tx_batch(uint16_t queue_id, struct pkt_buf* bufs[], + uint32_t num_bufs); + + void read_stats(struct device_stats *stats); + + void set_promisc(bool enabled); + + uint32_t get_link_speed(void); + uint32_t get_link_duplex(void); + + struct mac_address get_mac_addr(void); + + void set_mac_addr(struct mac_address mac); + + bool check_recv_irq(uint16_t qid) override; + + void ack_recv_irq(uint16_t qid) override; + + // Get the number of descriptors per RX queue + uint32_t get_rx_queue_depth(void) { + return NUM_RX_QUEUE_ENTRIES; + } + + // Get the number of descriptors per TX queue + uint32_t get_tx_queue_depth(void) { + return NUM_TX_QUEUE_ENTRIES; + } + + // Extend an RX queue's mempool + bool extend_rxq_mempool(uint16_t qid, uint32_t count) { + if (qid >= num_rx_queues) + return false; + + struct igc_rx_queue *queue = + ((struct igc_rx_queue *) rx_queues) + qid; + + return queue->mempool->reserve(count); + } + + // Shrink an RX queue's mempool + void shrink_rxq_mempool(uint16_t qid, uint32_t count) { + if (qid >= num_rx_queues) + return; + + struct igc_rx_queue *queue = + ((struct igc_rx_queue *) rx_queues) + qid; + + queue->mempool->cancel_reservation(count); + } + + /* + * Initializes and returns the Igc device. + * + * \param pci_dev PCI device handle received from this task's vbus + * \param cfg An Ixl device configuration. See Dev_cfg for details. + * + * \return The initialized Igc device. + */ + static Igc_device* igc_init(L4vbus::Pci_dev&& pci_dev, struct Dev_cfg &cfg); + +private: + // allocated for each rx queue, keeps state for the receive function + struct igc_rx_queue { + // DMA'able memory from that the individual descriptors are allocated + struct dma_memory descr_mem; + + // Array of descriptors backed by descr_mem + volatile struct igc_rx_desc* descriptors; + + // DMA'able memory for storing incoming packets + Mempool* mempool; + + // No. of descriptors in the queue + uint16_t num_entries; + // position we are reading from + uint16_t rx_index; + // True if this RX queue contains descriptors not yet processed by the + // driver. We use this flag to skip IRQ receive operations if necessary. + bool rx_pending = false; + // virtual addresses to map descriptors back to their mbuf for freeing + void* virtual_addresses[]; + }; + + // allocated for each tx queue, keeps state for the transmit function + struct igc_tx_queue { + // DMA'able memory from that the individual descriptors are allocated + struct dma_memory descr_mem; + + // Array of descriptors backed by descr_mem + volatile struct igc_tx_desc* descriptors; + + // No. of descriptors in the queue + uint16_t num_entries; + // position to clean up descriptors that where sent out by the nic + uint16_t clean_index; + // position to insert packets for transmission + uint16_t tx_index; + // virtual addresses to map descriptors back to their mbuf for freeing + void* virtual_addresses[]; + }; + + + + /*** Functions ***/ + + /** + * Enables an MSI for receive events. We will configure the NIC in a way + * that an MSI is generated for each packet received, while adhering to the + * ITR limit that the user can specify upon initializing the driver. + * + * \param qid Index of the queue for that the corresponding MSI-X shall be + * enabled. + */ + void enable_rx_interrupt(uint16_t qid); + + /** + * Disables an MSI for receive events. + * + * \param qid Index of the RX queue for that the corresponding MSI-X shall + * be disabled. + */ + void disable_rx_interrupt(uint16_t qid); + + void disable_interrupts(void); + + void init_link(void); + + void start_rx_queue(int queue_id); + + void start_tx_queue(int queue_id); + + void init_rx(void); + + void init_tx(void); + + void wait_for_link(void); + + void reset_and_init(void); + + void init_msix(void); + + /*** Member variables ***/ + + // MAC address of this device + struct mac_address mac_addr; + // Does mac_addr contain a valid value? + bool mac_init = false; +}; + +} // namespace Ixl \ No newline at end of file diff --git a/lib/include/driver/igc/igc_type.h b/lib/include/driver/igc/igc_type.h new file mode 100644 index 0000000..a1dd505 --- /dev/null +++ b/lib/include/driver/igc/igc_type.h @@ -0,0 +1,327 @@ +/** + * @file igc_type.h + * @author Paul Richter (paul.richter@spreewalddreieck.de) + * @brief header file for igc driver + * @version 1.0 + * @date 2025-11-07 + * + * @copyright Copyright (c) 2025 + * + * The igc device driver was developed as part of the bachelor thesis: "Generic + * aspects of porting a Linux Ethernet driver to the L4Re microkernel" It is + * nearly an identical copy of the ixl igb driver with changed register + * definitions. More information about how it works can be found there. (link + * will be inserted to a later date) + */ + + /***************************************************************************** + * * + * igc_type.h - Various definitions for driving igc-style NICs * + * * + * Many of the contents are adapted from similar header files of the Linux * + * kernel, ixl igb driver and the i225 datasheet. * + * * + ****************************************************************************/ + +#ifndef _IGC_TYPE_H_ +#define _IGC_TYPE_H_ + +// Receive descriptor +struct igc_rx_desc { + uint64_t buf_addr; /* Address of data buffer */ + uint16_t length; /* Amount of data transferred into buffer via DMA */ + uint16_t csum; /* Packet checksum */ + uint8_t status; /* Descriptor status */ + uint8_t errors; /* Descriptor errors */ + uint16_t vlan_tag; +}; + +// Transmit descriptor (copied from igb driver) +struct igc_tx_desc { + uint64_t buffer_addr; /* Address of the descriptor's data buffer */ + union { + uint32_t data; + struct { + uint16_t length; /* Data buffer length */ + uint8_t cso; /* Checksum offset */ + uint8_t cmd; /* Descriptor control */ + } flags; + } lower; + union { + uint32_t data; + struct { + uint8_t status; /* Descriptor status */ + uint8_t css; /* Checksum start */ + uint16_t special; + } fields; + } upper; +}; + +#define IGC_DEV_ID_I225_LM 0x15F2 +#define IGC_DEV_ID_I225_V 0x15F3 +#define IGC_DEV_ID_I225_I 0x15F8 +#define IGC_DEV_ID_I220_V 0x15F7 +#define IGC_DEV_ID_I225_K 0x3100 +#define IGC_DEV_ID_I225_K2 0x3101 +#define IGC_DEV_ID_I226_K 0x3102 +#define IGC_DEV_ID_I225_LMVP 0x5502 +#define IGC_DEV_ID_I226_LMVP 0x5503 +#define IGC_DEV_ID_I225_IT 0x0D9F +#define IGC_DEV_ID_I226_LM 0x125B +#define IGC_DEV_ID_I226_V 0x125C +#define IGC_DEV_ID_I226_IT 0x125D +#define IGC_DEV_ID_I221_V 0x125E +#define IGC_DEV_ID_I226_BLANK_NVM 0x125F +#define IGC_DEV_ID_I225_BLANK_NVM 0x15FD + +// General registers (taken from Linux IGC driver /igc_regs.h line 7) +#define IGC_CTRL 0x00000 /* Device Control - RW */ +#define IGC_STATUS 0x00008 /* Device Status - RO */ +#define IGC_EECD 0x00010 /* EEPROM/Flash Control - RW */ +#define IGC_CTRL_EXT 0x00018 /* Extended Device Control - RW */ +#define IGC_MDIC 0x00020 /* MDI Control - RW */ +#define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ +#define IGC_VET 0x00038 /* VLAN Ether Type - RW */ +#define IGC_LEDCTL 0x00E00 /* LED Control - RW */ +#define IGC_I225_PHPM 0x00E14 /* I225 PHY Power Management */ +#define IGC_GPHY_VERSION 0x0001E /* I225 gPHY Firmware Version */ + +// igc_regs.h line 44 +/* Interrupt Register Description */ +#define IGC_EICR 0x01580 /* Ext. Interrupt Cause read - W0 */ +#define IGC_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ +#define IGC_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ +#define IGC_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ +#define IGC_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ +#define IGC_EIAM 0x01530 /* Ext. Interrupt Auto Mask - RW */ +#define IGC_ICR 0x01500 /* Intr Cause Read - RC/W1C */ +#define IGC_ICS 0x01504 /* Intr Cause Set - WO */ +#define IGC_IMS 0x01508 /* Intr Mask Set/Read - RW */ +#define IGC_IMC 0x0150C /* Intr Mask Clear - WO */ +#define IGC_IAM 0x01510 /* Intr Ack Auto Mask- RW */ +/* Intr Throttle - RW */ +#define IGC_EITR 0x01680 +/* Interrupt Vector Allocation - RW */ +#define IGC_IVAR0 0x01700 +#define IGC_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ +#define IGC_GPIE 0x01514 /* General Purpose Intr Enable - RW */ + +// igc_defines.h 304 +/* Receive Descriptor bit definitions */ +#define IGC_RXD_STAT_DD 0x01 /* Descriptor Done */ +// igc_defines.h 370 +#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */ +// +#define IGC_RDH 0x0C010 +#define IGC_RDT 0x0C018 + +// (igc_regs.h line 64) +/* RSS registers */ +#define IGC_MRQC 0x05818 /* Multiple Receive Control - RW */ + +// (igc_defines.h line 386) +#define IGC_MRQC_ENABLE_RSS_MQ 0x00000002 + +/* transmit descriptor defines */ +// (igc_regs.h line 114) +#define IGC_TCTL 0x00400 /* Tx Control - RW */ +#define IGC_TDBAL 0x0E000 +#define IGC_TDBAH 0x0E004 +#define IGC_TDLEN 0x0E008 +#define IGC_TDH 0x0E010 +#define IGC_TDT 0x0E018 +#define IGC_TXDCTL0 0x0E028 + +// adapted from igb driver +#define IGC_TXDCTL_EN 0x02000000 + +// adapated from igb RX checksum control register +#define IGC_RXCSUM_TUOFL 0x00000200 /* TCP & UDP csum offloading */ +#define IGC_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ + +// (igc_defines.h 313) +/* Transmit descriptor defines */ +#define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define IGC_TXD_STAT_DD 0x00000001 /* Descriptor Done */ + +// copied from igb driver +#define IGC_TXD_CMD_IFCS 0x02000000 +#define IGC_TXD_CMD_RS 0x08000000 + +// (igc_defines.h 331) +/* Transmit Control */ +#define IGC_TCTL_EN 0x00000002 /* enable Tx */ +#define IGC_TCTL_PSP 0x00000008 /* pad short packets */ +#define IGC_TCTL_CT 0x00000ff0 /* collision threshold */ + +// (igc_defines.h 217) +/* Collision related configuration parameters */ +#define IGC_COLLISION_THRESHOLD 15 +#define IGC_CT_SHIFT 4 + +// (igc_regs.h line 223). + +// Device Status +#define IGC_STATUS_FD 0x00000001 /* Full duplex.0=half,1=full */ +#define IGC_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ +#define IGC_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ +#define IGC_STATUS_FUNC_SHIFT 2 +#define IGC_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define IGC_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ +#define IGC_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +#define IGC_STATUS_SPEED_2500 0x00400000 /* Speed 2.5Gb/s */ + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_2500 2500 +#define HALF_DUPLEX 1 +#define FULL_DUPLEX 2 + +typedef enum +{ + LINK_SPEED_UNDEFINED = 0, + LINK_SPEED_10 = 10 /* 10 MiB / s */, + LINK_SPEED_100 = 100 /* 100 MiB / s */, + LINK_SPEED_1000 = 1000 /* 1000 MiB / s */, + LINK_SPEED_2500 = 2500 /* 2500 MiB / s*/, +} EnumLinkSpeed; + +typedef enum{ + LINK_DUPLEX_UNDEFINED = 0, + LINK_DUPLEX_HALF = 1, + LINK_DUPLEX_FULL = 2 +} EnumLinkDuplex; + +// copied from igb +#define IGC_CTRL_DEV_RST 0x20000000 + +// copied from igb +/* general purpose interrupt enable */ +#define IGC_GPIE_NSICR 0x00000001 +#define IGC_GPIE_MMSIX 0x00000010 +#define IGC_GPIE_EIAME 0x40000000 +#define IGC_GPIE_PBA 0x80000000 + +// taken from igc_defines.h 131 +/* Device Control */ +#define IGC_CTRL_RST 0x04000000 /* Global reset */ + +#define IGC_CTRL_PHY_RST 0x80000000 /* PHY Reset */ +#define IGC_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ +#define IGC_CTRL_FRCSPD 0x00000800 /* Force Speed */ +#define IGC_CTRL_FRCDPX 0x00001000 /* Force Duplex */ + +// Receive registers (taken from igc_defines.h line 347 and igc_regs.h line 98). +#define IGC_RCTL 0x00100 /* Rx Control - RW */ +#define IGC_RCTL_RST 0x00000001 /* Software reset */ +#define IGC_RCTL_EN 0x00000002 /* enable */ +#define IGC_RCTL_SBP 0x00000004 /* store bad packet */ +#define IGC_RCTL_UPE 0x00000008 /* unicast promisc enable */ +#define IGC_RCTL_MPE 0x00000010 /* multicast promisc enable */ +#define IGC_RCTL_LPE 0x00000020 /* long packet enable */ +#define IGC_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define IGC_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ +#define IGC_RCTL_BAM 0x00008000 /* broadcast enable */ + +// from igb linux header confirmed with i225 datasheet that bit mask is the same +#define IGC_RCTL_VFE 0x00040000 /* VLAN Filter */ + +#define IGC_SRRCTL0 0x0C00C // adapted from igc_regs.h line 94 +#define IGC_PSRTYPE 0x05480 +#define IGC_RDBAL 0x0C000 +#define IGC_RDBAH 0x0C004 +#define IGC_RDLEN 0x0C008 +#define IGC_RXDCTL0 0x0C028 +#define IGC_RQDPC 0x0C030 +#define IGC_RXCSUM 0x05000 /* Rx Checksum Control - RW */ +#define IGC_RLPML 0x05004 /* Rx Long Packet Max Length */ +#define IGC_RFCTL 0x05008 /* Receive Filter Control*/ +#define IGC_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define IGC_RA 0x05400 /* Receive Address - RW Array */ +#define IGC_UTA 0x0A000 /* Unicast Table Array - RW */ +#define IGC_RAL(_n) (0x05400 + ((_n) * 0x08)) +#define IGC_RAL_MAC_ADDR_LEN 4 +#define IGC_RAH(_n) (0x05404 + ((_n) * 0x08)) +#define IGC_RAH_MAC_ADDR_LEN 2 +#define IGC_VLANPQF 0x055B0 /* VLAN Priority Queue Filter - RW */ + +#define IGC_RXDCTL_EN 0x02000000 // adapted from igc.h line 491 + +// copied from igb crossreferenced in datasheet to ensure that it's the same for +// igc +#define IGC_SRRCTL_DREN 0x80000000 /* Drop enabled if no descr. avail. */ + +#define ETH_ALEN 6 /* length of Ethernet/MAC address */ + +// Statistics registers. +#define IGC_CRCERRS 0x04000 /* CRC Error Count - R/clr */ +#define IGC_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ +#define IGC_RXERRC 0x0400C /* Receive Error Count - R/clr */ +#define IGC_MPC 0x04010 /* Missed Packet Count - R/clr */ +#define IGC_SCC 0x04014 /* Single Collision Count - R/clr */ +#define IGC_ECOL 0x04018 /* Excessive Collision Count - R/clr */ +#define IGC_MCC 0x0401C /* Multiple Collision Count - R/clr */ +#define IGC_LATECOL 0x04020 /* Late Collision Count - R/clr */ +#define IGC_COLC 0x04028 /* Collision Count - R/clr */ +#define IGC_RERC 0x0402C /* Receive Error Count - R/clr */ +#define IGC_DC 0x04030 /* Defer Count - R/clr */ +#define IGC_TNCRS 0x04034 /* Tx-No CRS - R/clr */ +#define IGC_HTDPMC 0x0403C /* Host Transmit Discarded by MAC - R/clr */ +#define IGC_RLEC 0x04040 /* Receive Length Error Count - R/clr */ +#define IGC_XONRXC 0x04048 /* XON Rx Count - R/clr */ +#define IGC_XONTXC 0x0404C /* XON Tx Count - R/clr */ +#define IGC_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ +#define IGC_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ +#define IGC_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ +#define IGC_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ +#define IGC_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ +#define IGC_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ +#define IGC_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ +#define IGC_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ +#define IGC_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ +#define IGC_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ +#define IGC_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ +#define IGC_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ +#define IGC_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ +#define IGC_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ +#define IGC_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ +#define IGC_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ +#define IGC_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ +#define IGC_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ +#define IGC_RUC 0x040A4 /* Rx Undersize Count - R/clr */ +#define IGC_RFC 0x040A8 /* Rx Fragment Count - R/clr */ +#define IGC_ROC 0x040AC /* Rx Oversize Count - R/clr */ +#define IGC_RJC 0x040B0 /* Rx Jabber Count - R/clr */ +#define IGC_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ +#define IGC_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ +#define IGC_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ +#define IGC_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ +#define IGC_TORH 0x040C4 /* Total Octets Rx High - R/clr */ +#define IGC_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ +#define IGC_TOTH 0x040CC /* Total Octets Tx High - R/clr */ +#define IGC_TPR 0x040D0 /* Total Packets Rx - R/clr */ +#define IGC_TPT 0x040D4 /* Total Packets Tx - R/clr */ +#define IGC_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ +#define IGC_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ +#define IGC_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ +#define IGC_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ +#define IGC_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ +#define IGC_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ +#define IGC_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ +#define IGC_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ +#define IGC_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ +#define IGC_IAC 0x04100 /* Interrupt Assertion Count */ +#define IGC_RPTHC 0x04104 /* Rx Packets To Host */ +#define IGC_TLPIC 0x04148 /* EEE Tx LPI Count */ +#define IGC_RLPIC 0x0414C /* EEE Rx LPI Count */ +#define IGC_HGPTC 0x04118 /* Host Good Packets Tx Count */ +#define IGC_RXDMTC 0x04120 /* Rx Descriptor Minimum Threshold Count */ +#define IGC_HGORCL 0x04128 /* Host Good Octets Received Count Low */ +#define IGC_HGORCH 0x0412C /* Host Good Octets Received Count High */ +#define IGC_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ +#define IGC_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ +#define IGC_LENERRS 0x04138 /* Length Errors Count */ + +#endif /* _IGC_TYPE_H_ */ \ No newline at end of file diff --git a/lib/src/Makefile b/lib/src/Makefile index 013704e..d810d44 100644 --- a/lib/src/Makefile +++ b/lib/src/Makefile @@ -23,6 +23,7 @@ SRC_CC = log.cc \ device.cc \ driver/e1000/e1000.cc \ driver/igb/igb.cc \ + driver/igc/igc.cc \ driver/ixgbe/ixgbe.cc # Include package-private header files diff --git a/lib/src/device.cc b/lib/src/device.cc index 30825cb..9dcafe6 100644 --- a/lib/src/device.cc +++ b/lib/src/device.cc @@ -25,6 +25,7 @@ #include "driver/e1000/e1000.h" #include "driver/igb/igb.h" #include "driver/ixgbe/ixgbe.h" +#include "driver/igc/igc.h" #include "pci.h" /**************************************************************************** @@ -174,6 +175,7 @@ Ixl_device* Ixl_device::ixl_init(L4::Cap vbus, return E1000_device::e1000_init(std::move(dev), cfg); // Hereinafter all igb-driver devices case IGB_DEV_ID_82576: + case IGB_DEV_ID_I210: case IGB_DEV_ID_I350: ixl_warn("The Igb driver provides only a limited feature " "set. You have been warned!"); @@ -183,6 +185,25 @@ Ixl_device* Ixl_device::ixl_init(L4::Cap vbus, case IXGBE_DEV_ID_82598: ixl_info("Trying ixgbe..."); return Ixgbe_device::ixgbe_init(std::move(dev), cfg); + + // Hereinafter all igc-driven devices + // i225 NICs + case IGC_DEV_ID_I225_LM: + case IGC_DEV_ID_I225_V: + case IGC_DEV_ID_I225_I: + case IGC_DEV_ID_I225_K: + case IGC_DEV_ID_I225_K2: + case IGC_DEV_ID_I225_LMVP: + case IGC_DEV_ID_I225_IT: + // i226 NICs + case IGC_DEV_ID_I226_K: + case IGC_DEV_ID_I226_LMVP: + case IGC_DEV_ID_I226_LM: + case IGC_DEV_ID_I226_V: + case IGC_DEV_ID_I226_IT: + ixl_info("Trying IGC..."); + return Igc_device::igc_init(std::move(dev), cfg); + default: ixl_error("Unsupported device %x of vendor %x. " "No suitable driver found.", device_id, diff --git a/lib/src/driver/igc/igc.cc b/lib/src/driver/igc/igc.cc new file mode 100644 index 0000000..385e528 --- /dev/null +++ b/lib/src/driver/igc/igc.cc @@ -0,0 +1,891 @@ +/** + * @file igc_type.h + * @author Paul Richter (paul.richter@spreewalddreieck.de) + * @brief header file for igc driver + * @version 1.0 + * @date 2025-11-07 + * + * @copyright Copyright (c) 2025 + * + * The igc device driver was developed as part of the bachelor thesis: "Generic + * aspects of porting a Linux Ethernet driver to the L4Re microkernel" It is + * nearly an identical copy of the ixl igb driver with changed register + * definitions. More information about how it works can be found there. (link + * will be inserted to a later date) + */ + + +#include +#include + +#include "driver/igc/igc.h" +#include + +using namespace Ixl; + +/**************************************************************************** + * * + * function implementation * + * * + ****************************************************************************/ + + /*** Constructor ***/ +Igc_device::Igc_device(L4vbus::Pci_dev&& dev, struct Dev_cfg &cfg, uint32_t itr_rate) { + l4_timeout_s l4tos; // L4 timeout object with us granularity + + if (cfg.num_rx_queues != 1) { + ixl_error("Currently, an Igc device supports exactly one receive queue."); + } + if (cfg.num_tx_queues != 1) { + ixl_error("Currently, an Igc device supports exactly one transmit queue."); + } + + // (see igb for reference) + num_rx_queues = cfg.num_rx_queues; + num_tx_queues = cfg.num_tx_queues; + + // Set up IRQ-related data + if (cfg.irq_timeout_ms < 0) { + l4tos = l4_timeout_from_us(L4_TIMEOUT_US_NEVER); + } else { + l4tos = l4_timeout_from_us(cfg.irq_timeout_ms * 1000); + } + + if (cfg.irq_timeout_ms == 0) { + // seems to only be used by ixl driver test packages + interrupts.mode = interrupt_mode::Disable; + } else if (cfg.irq_timeout_ms == -1) { + // default option set by the Virtual Network Switch + interrupts.mode = interrupt_mode::Notify; + } else { + // does not seem to be used anywhere + interrupts.mode = interrupt_mode::Wait; + } + + + interrupts.itr_rate = itr_rate; + interrupts.timeout = l4_timeout(l4tos, l4tos); + + /* map io memory based on memory address provided by pci register*/ + pci_dev = dev; + + // Map BAR0 region + ixl_debug("Mapping BAR0 I/O memory..."); + baddr[0] = pci_map_bar(pci_dev, 0); + + /* dma todo*/ + // Create a DMA space for this device + create_dma_space(); + + if(interrupts.mode != interrupt_mode::Disable) { + // setup icu capability as before managing interrupts (see igb for + // reference) + setup_icu_cap(); + init_msix(); + } + + rx_queues = calloc(num_rx_queues, sizeof(struct igc_rx_queue) + sizeof(void*) * MAX_RX_QUEUE_ENTRIES); + tx_queues = calloc(num_tx_queues, sizeof(struct igc_tx_queue) + sizeof(void*) * MAX_TX_QUEUE_ENTRIES); +} + + + +/* Init functions*/ + +Igc_device* Igc_device::igc_init(L4vbus::Pci_dev&& pci_dev, + struct Dev_cfg &cfg) { + + ixl_info("Entering..."); + + // Create a new IGC device. itr_rate set to 0x028 yields max 97600 INT/s. + // TODO create define for 0x028.. TODO: Check whether these IRQ settings are + // meaningful for Igc. + Igc_device *dev = new Igc_device(std::move(pci_dev), cfg, 0x028); + + // (Re-) initialize the device, making it ready for operations + dev->reset_and_init(); + + return dev; +} + +// Reset the device and bring up the link again in a fresh state +void Igc_device::reset_and_init(void) { + ixl_info("Resetting Igc device"); + + // disable interrupts + disable_interrupts(); + + // Stop receive and transmit units, wait for pending transactions to + // complete + clear_flags32(baddr[0], IGC_RCTL, IGC_RCTL_EN); + clear_flags32(baddr[0], IGC_TCTL, IGC_TCTL_EN); + usleep(10000); + + // reset device Issue the reset command (done by setting the reset bit in + // the ctrl reg) + uint32_t ctrl_reg = get_reg32(baddr[0], IGC_CTRL); + + set_reg32(baddr[0], IGC_CTRL, ctrl_reg | IGC_CTRL_DEV_RST | IGC_CTRL_PHY_RST); + // Wait for NIC to read default settings from EEPROM + usleep(10000); + + // disable interrupts again after reset + disable_interrupts(); + + ixl_info("Reset completed, starting init phase."); + /* TODO: perform configuration */ + + // init link connection + ixl_debug("Init link connection"); + init_link(); + + // Read out MAC address and return. + struct mac_address macAddrStruct = get_mac_addr(); + + uint32_t linkSpeed = get_link_speed(); + uint32_t linkDuplex = get_link_duplex(); + + ixl_info("Exiting (MAC address from EEPROM: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x, link speed: %d, link duplex: %d) ...", + macAddrStruct.addr[0], macAddrStruct.addr[1], macAddrStruct.addr[2], + macAddrStruct.addr[3], macAddrStruct.addr[4], macAddrStruct.addr[5], + linkSpeed, linkDuplex); + + /* TODO: initalize stats */ + /* TODO: initialze Receive & Transmit */ + init_rx(); + ixl_debug("Receive should be enabled: %d", get_reg32(baddr[0], IGC_RCTL) & IGC_RCTL_EN); + + init_tx(); + + // Start RX and TX queues + start_rx_queue(0); + start_tx_queue(0); + ixl_debug("Transmit should be enabled: %d", get_reg32(baddr[0], IGC_TCTL) & IGC_TCTL_EN); + usleep(1000); + + /* TODO: enable interrupts*/ + + // Enable IRQ for receiving packets + if (interrupts.mode != interrupt_mode::Disable) { + // Configure the NIC to use multiple MSI-X mode (one IRQ per queue), + // also set other flags recommended in section 7.3.3.11 + set_flags32(baddr[0], IGC_GPIE, IGC_GPIE_MMSIX | + IGC_GPIE_NSICR | + IGC_GPIE_EIAME | + IGC_GPIE_PBA); + + // For now, only enable the IRQ of the first receive queue + enable_rx_interrupt(0); + } + + /* enable promiscuous to receive all network packages (without promiscuous + the NIC drops all pkgs that are not addressed to it. Destination MAC != NIC + MAC)*/ + set_promisc(true); + + // if no link is found it waits for 10 secs for link to come up + wait_for_link(); + +} + +void Igc_device::init_link(void) { + uint32_t ctrl; + + ixl_info("Setting link up"); + + ctrl = get_reg32(baddr[0], IGC_CTRL); + + ctrl |= IGC_CTRL_SLU; + ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX); // force auto-neg and duplex + + set_reg32(baddr[0], IGC_CTRL, ctrl); +} + +void Igc_device::init_msix(void) { + // based on igb setup_interrupt() + + interrupts.queues = (struct interrupt_queue*) malloc(num_rx_queues * sizeof(struct interrupt_queue)); + + // FIXME: We should rely on MSI-X / MSI only for Igb devices, also these NIC + // have more than one interrupt available + // + // Determine type of interrupt available at the device. We will only go with + // MSI-X in non-SR-IOV mode. + if (pcidev_supports_msix(pci_dev)) { + uint32_t bir; // BAR location of MSI-X table + uint32_t table_offs; // Offset of MSI-X table in BAR + uint32_t table_size; // Size of MSI-X table + + ixl_info("Using MSI-X interrupts..."); + interrupts.interrupt_type = IXL_IRQ_MSIX; + setup_msix(pci_dev); + + pcidev_get_msix_info(pci_dev, &bir, &table_offs, &table_size); + // Check whether the requested BAR is already mapped + if (baddr[bir] == NULL) { + ixl_debug("Mapping in BAR%u for accessing MSI-X table.", bir); + baddr[bir] = pci_map_bar(pci_dev, bir); + } + + // For now we will do a 1:1 mapping between RX queue number and the + // MSI-X vector allocated for the respective queue + for (unsigned int rq = 0; rq < num_rx_queues; rq++) { + // L4 representation of the MSI vector. We need to add flags to use + // L4's API correctly... + uint32_t msi_vec_l4; + l4_icu_msi_info_t msi_info; + + // This is what took me three hours to realize: In the L4Re API, the + // same interface is used to handle legacy IRQs and MSIs. + // Apparently, this is why it is mandatory to add a special MSI flag + // to the actual MSI vector ID when calling L4Re functions that + // should do something w.r.t. to said MSI... + msi_vec_l4 = rq | L4::Icu::F_msi; + create_and_bind_irq(msi_vec_l4, &interrupts.queues[rq].irq, + interrupts.vicu); + + // Get the MSI info + uint64_t source = pci_dev.dev_handle() | L4vbus::Icu::Src_dev_handle; + L4Re::chksys(interrupts.vicu->msi_info(msi_vec_l4, source, + &msi_info), + "Failed to retrieve MSI info."); + ixl_debug("MSI info: vector = 0x%x addr = %llx, data = %x", + rq, msi_info.msi_addr, msi_info.msi_data); + + // PCI-enable of MSI-X + pcidev_enable_msix(rq, msi_info, baddr[bir], table_offs, + table_size); + + L4Re::chksys(l4_ipc_error(interrupts.vicu->unmask(msi_vec_l4), + l4_utcb()), + "Failed to unmask interrupt"); + + ixl_debug("MSI-X vector allocated for RX queue %u is %u", + rq, rq); + + interrupts.queues[rq].moving_avg.length = 0; + interrupts.queues[rq].moving_avg.index = 0; + interrupts.queues[rq].msi_vec = rq; + interrupts.queues[rq].interval = INTERRUPT_INITIAL_INTERVAL; + + ixl_debug("Attached to MSI-X %u", rq); + } + } + else { + // Disable IRQs completely if MSI-X is not present + interrupts.interrupt_type = IXL_IRQ_LEGACY; + + ixl_warn("Device does not support MSIs. Disabling interrupts..."); + interrupts.mode = interrupt_mode::Disable; + return; + } +} + +void Igc_device::init_rx(void) { + // function is based on ixl igb driver + + // For now we assume that this function is only called immediately after a + // reset operation with RX and TX disabled, so we do not need to disable + // them again here. + + // Disable VLAN filtering as we do not support it anyways + clear_flags32(baddr[0], IGC_RCTL, IGC_RCTL_VFE); + + for (uint16_t i = 0; i < num_rx_queues; i++) { + ixl_debug("initializing rx queue %d", i); + + + // Instruct NIC to drop packets if no RX descriptors are available + // FIXME: When using multiple RX queues, choose the rights SRRCTL reg + set_flags32(baddr[0], IGC_SRRCTL0, IGC_SRRCTL_DREN); + + struct igc_rx_queue* queue = ((struct igc_rx_queue*) rx_queues) + i; + + uint32_t ring_size_bytes = NUM_RX_QUEUE_ENTRIES * sizeof(struct igc_rx_desc); + struct dma_memory mem = memory_allocate_dma(*this, ring_size_bytes); + + // neat trick from Snabb: initialize to 0xFF to prevent rogue memory + // accesses on premature DMA activation + memset(mem.virt, -1, ring_size_bytes); + + // Keep a reference to mem in the queue, otherwise the object will go + // out of scope, leading to the revocation of the backing capability + queue->descr_mem = mem; + + // private data for the driver, 0-initialized + queue->num_entries = NUM_RX_QUEUE_ENTRIES; + queue->rx_index = 0; + queue->descriptors = (struct igc_rx_desc*) mem.virt; + + // Tell the device where it can write to (its iova, so DMA addrs) + set_reg32(baddr[0], IGC_RDBAL, (uint32_t) (mem.phy & 0xFFFFFFFFull)); + set_reg32(baddr[0], IGC_RDBAH, (uint32_t) (mem.phy >> 32)); + set_reg32(baddr[0], IGC_RDLEN, ring_size_bytes); + // Set ring to empty at start + set_reg32(baddr[0], IGC_RDH, 0); + set_reg32(baddr[0], IGC_RDT, 0); + ixl_debug("rx ring %d phy addr: 0x%012llX", i, mem.phy); + ixl_debug("rx ring %d virt addr: 0x%012lX", i, (uintptr_t) mem.virt); + } + + // Enable checksum offloading for CRC and received UCP/TCP packets + set_flags32(baddr[0], IGC_RXCSUM, IGC_RXCSUM_TUOFL | IGC_RXCSUM_CRCOFL); + + // Merely kicks of the RX part by setting the RX enabled bit in RCTL Also + // enables reception of broadcast frames (BAM) + set_flags32(baddr[0], IGC_RCTL, IGC_RCTL_EN | IGC_RCTL_BAM); +} + +void Igc_device::init_tx(void) { + // FIXME: Enable mq support for this type of NIC + for (uint16_t i = 0; i < num_tx_queues; i++) { + struct igc_tx_queue* queue = ((struct igc_tx_queue*) tx_queues) + i; + ixl_debug("initializing tx queue %d", i); + + // setup descriptor ring, see section 7.1.9 + uint32_t ring_size_bytes = NUM_TX_QUEUE_ENTRIES * sizeof(struct igc_tx_desc); + struct dma_memory mem = memory_allocate_dma(*this, ring_size_bytes); + memset(mem.virt, -1, ring_size_bytes); + + // tell the device where it can write to (its iova, so DMA addrs) + set_reg32(baddr[0], IGC_TDBAL, (uint32_t) (mem.phy & 0xFFFFFFFFull)); + set_reg32(baddr[0], IGC_TDBAH, (uint32_t) (mem.phy >> 32)); + set_reg32(baddr[0], IGC_TDLEN, ring_size_bytes); + ixl_debug("tx ring %d phy addr: 0x%012llX", i, mem.phy); + ixl_debug("tx ring %d virt addr: 0x%012lX", i, (uintptr_t) mem.virt); + + // Init TX queue to empty + set_reg32(baddr[0], IGC_TDH, 0); + set_reg32(baddr[0], IGC_TDT, 0); + + // Keep a reference to mem in the queue, otherwise the object will go + // out of scope, leading to the revocation of the backing capability + queue->descr_mem = mem; + + // private data for the driver, 0-initialized + queue->num_entries = NUM_TX_QUEUE_ENTRIES; + queue->descriptors = (struct igc_tx_desc*) mem.virt; + } +} + +/* Getter */ + +// Get the link speed in Mbps, or 0 if link is down +uint32_t Igc_device::get_link_speed(void) { + + uint32_t status = get_reg32(baddr[0], IGC_STATUS); + + if(!(status & IGC_STATUS_LU)) + { + ixl_info("Can not get link speed, device not up!"); + return 0; + } + + if(status & IGC_STATUS_SPEED_2500) + return 2500; + if(status & IGC_STATUS_SPEED_1000) + return 1000; + if(status & IGC_STATUS_SPEED_100) + return 100; + + return 0; +} + +uint32_t Igc_device::get_link_duplex(void) { + + // Locals. + uint32_t status; + uint16_t duplex; + + // Read status register + status = get_reg32(baddr[0], IGC_STATUS); + if (status & IGC_STATUS_FD) { + duplex = FULL_DUPLEX; + //ixl_debug("Full Duplex\n"); + } else { + duplex = HALF_DUPLEX; + //ixl_debug("Half Duplex\n"); + } + + return (uint32_t) duplex; //TODO ideally rename type in device.h to return the enum, need to put enums into shared device.h for example tho +} + +struct mac_address Igc_device::get_mac_addr(void) { + + ixl_info("Entering..."); + + // Locals. + uint32_t rar_high; + uint32_t rar_low; + uint16_t i; + + // Read high and low registers. + rar_high = get_reg32(baddr[0], IGC_RAH(0)); + rar_low = get_reg32(baddr[0], IGC_RAL(0)); + + // Parse bytes into MAC address. + struct mac_address macAddrStruct = {0x0}; + + for (i = 0; i < IGC_RAL_MAC_ADDR_LEN; i++) { + macAddrStruct.addr[i] = (uint8_t)(rar_low >> (i * 8)); + } + for (i = 0; i < IGC_RAH_MAC_ADDR_LEN; i++) { + macAddrStruct.addr[IGC_RAL_MAC_ADDR_LEN + i] = (uint8_t)(rar_high >> (i * 8)); + } + // Return. + return macAddrStruct; +} + + + +/* Setter */ + +void Igc_device::set_promisc(bool enabled) { + // Set / clear settings for both unicast and multicast packets + if (enabled) { + ixl_info("enabling promisc mode"); + set_flags32(baddr[0], IGC_RCTL, IGC_RCTL_MPE | IGC_RCTL_UPE); + } else { + ixl_info("disabling promisc mode"); + clear_flags32(baddr[0], IGC_RCTL, IGC_RCTL_MPE | IGC_RCTL_UPE); + } +} + +void Igc_device::set_mac_addr(struct mac_address mac) { + (void) mac; + return; +} + + +/* Enabler/Disabler */ + +// disable all interrupts +void Igc_device::disable_interrupts(void) { + ixl_debug("Masking off all IRQs for Igc device"); + set_reg32(baddr[0], IGC_IMC, 0xffffffff); + set_reg32(baddr[0], IGC_EIMC, 0xffffffff); + + // clear pending interrupts + get_reg32(baddr[0], IGC_EICR); +} + + + +// Enables a receive interrupt of the NIC. +// */ +void Igc_device::enable_rx_interrupt(uint16_t qid) { + // Get MSI-X vector allocated for the respective RX queue + uint32_t msi_vec = interrupts.queues[qid].msi_vec; + // Current content of IVAR register + uint32_t ivar; + + // Allocate an IRQ vector via the IVAR reg, see also sections 7.3.2 and + // 8.8.15 of the I350 programmers manual + ivar = get_reg32(baddr[0], IGC_IVAR0 + 4 * (qid / 2)); + + if ((qid % 2) == 0) { + // Restrict queue ID to 5 bit in length and set valid bit for the new + // entry in the IVAR reg + ivar = ivar | (msi_vec & 0x0000001f) | 0x00000080; + + // Set new IVAR value + set_reg32(baddr[0], IGC_IVAR0 + 4 * (qid / 2), ivar); + } + else { + // Restrict queue ID to 5 bit in length and set valid bit for the new + // entry in the IVAR reg + uint32_t vec = (msi_vec & 0x0000001f) << 16; + ivar = ivar | vec | 0x00800000; + + // Set new IVAR value + set_reg32(baddr[0], IGC_IVAR0 + 4 * (qid / 2), ivar); + } + + // Limit the ITR to prevent IRQ storms + set_reg32(baddr[0], IGC_EITR + 4 * msi_vec, + (interrupts.itr_rate & 0x00001fff) << 2); + + // No auto clear, following an interrupt, software might read the EICR + // register to check for the interrupt causes. + set_flags32(baddr[0], IGC_EIAC, 1 << msi_vec); + + // Set the auto mask in the EIAM register according to the preferred mode of + // operation. + if (interrupts.mode == interrupt_mode::Notify) + // In Notify mode we prefer auto-masking the interrupts. + set_flags32(baddr[0], IGC_EIAM, 1 << msi_vec); + else if (interrupts.mode == interrupt_mode::Wait) + // In Wait mode we prefer not auto-masking the interrupts. + clear_flags32(baddr[0], IGC_EIAM, 1 << msi_vec); + + // Enable the receive interrupt cause + set_reg32(baddr[0], IGC_EIMS, 1 << msi_vec); +} + +// Disables a receive interrupt of the NIC. +void Igc_device::disable_rx_interrupt(uint16_t qid) { + uint32_t msi_vec = interrupts.queues[qid].msi_vec; + + // Disable the receive interrupt cause. + set_reg32(baddr[0], IGC_EIMC, 1 << msi_vec); +} + +void Igc_device::start_rx_queue(int queue_id) { + ixl_debug("starting rx queue %d", queue_id); + struct igc_rx_queue* queue = ((struct igc_rx_queue*) rx_queues) + queue_id; + + // Allocate packet buffers and set backing memory for descriptors 2048 as + // pktbuf size is strictly speaking incorrect: we need a few headers (1 + // cacheline), so there's only 1984 bytes left for the device but the 82599 + // can only handle sizes in increments of 1 kb; but this is fine since our + // max packet size is the default MTU of 1518 this has to be fixed if jumbo + // frames are to be supported mempool should be >= the number of rx and tx + // descriptors for a forwarding application + int mempool_size = MIN_MEMPOOL_ENTRIES << 4; + + // Create the RX memory pool and reserve the minimum number of packets for + // use by the driver. + queue->mempool = new Mempool(*this, mempool_size, PKT_BUF_ENTRY_SIZE, + MEMPOOL_LIMIT); + queue->mempool->reserve(MIN_MEMPOOL_ENTRIES); + if (queue->num_entries & (queue->num_entries - 1)) { + ixl_debug("Numb of queue entries: %u", queue->num_entries); + ixl_error("number of queue entries must be a power of 2"); + } + for (int j = 0; j < queue->num_entries; j++) { + volatile struct igc_rx_desc* rxd = queue->descriptors + j; + struct pkt_buf* buf = queue->mempool->pkt_buf_alloc(); + if (!buf) { + ixl_error("failed to allocate rx buffer"); + } + + // Set buffer address and clear RXD flags + rxd->buf_addr = buf->buf_addr_phy + offsetof(struct pkt_buf, data); + rxd->errors = 0; + rxd->status = 0; + // we need to return the virtual address in the rx function which the + // descriptor doesn't know by default + queue->virtual_addresses[j] = buf; + } + + ixl_debug("SRRCTL0 = %x", get_reg32(baddr[0], IGC_SRRCTL0)); + + // Wait for the enable bit to show up FIXME: Implement this for other queues + // as well + set_flags32(baddr[0], IGC_RXDCTL0, IGC_RXDCTL_EN); + wait_set_reg32(baddr[0], IGC_RXDCTL0, IGC_RXDCTL_EN); + + // Only now set the final head and tail pointers (were initialized to 0) + set_reg32(baddr[0], IGC_RDH, 0); + set_reg32(baddr[0], IGC_RDT, queue->num_entries - 1); +} + +/* Kicks of the TX part by configuring the TCTL register accordingly */ +void Igc_device::start_tx_queue(int queue_id) { + (void) queue_id; + + // Enable queue 0 FIXME: As an optimization, we could also think about + // enabling bursted write back of finished send descriptors here. + set_flags32(baddr[0], IGC_TXDCTL0, IGC_TXDCTL_EN); + + uint32_t tctl = get_reg32(baddr[0], IGC_TCTL); + + // Clear collision threshold bitmask + tctl &= ~IGC_TCTL_CT; + + // Set collision threshold default values (16, as demanded by IEEE). + tctl |= IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT; + + // Enable TX queue, enable padding of short packets + set_reg32(baddr[0], IGC_TCTL, tctl | IGC_TCTL_EN | IGC_TCTL_PSP); +} + + + +/* device functions */ + +void Igc_device::wait_for_link(void) { + + // TODO is the content below relevant for IGC? + + ixl_info("Waiting for link..."); + int32_t max_wait = 10; // 10 seconds in us + uint32_t status; + + while(max_wait > 0) + { + status = get_reg32(baddr[0], IGC_STATUS); + if(status & IGC_STATUS_LU) + { + ixl_info("Link is up!"); + return; + } + + sleep(1); + max_wait--; + } + + ixl_info("Link detection timeout..."); +} + + + +uint32_t Igc_device::rx_batch(uint16_t queue_id, struct pkt_buf* bufs[], + uint32_t num_bufs) { + struct interrupt_queue* interrupt = NULL; + bool interrupt_wait = interrupts.mode == interrupt_mode::Wait; + + // For non-debug builds insert an additional bounds check for the queue_id + l4_assert(queue_id == 0); + struct igc_rx_queue* queue = ((struct igc_rx_queue*) rx_queues) + queue_id; + + if (interrupt_wait) { + interrupt = &interrupts.queues[queue_id]; + } + + if (interrupt_wait && interrupt->interrupt_enabled) { + if (! queue->rx_pending) { + // We only listen on IRQs caused by this RQ, so nothing to do + // afterwards as auto clearing is enabled + interrupt->irq->receive(interrupts.timeout); + } + } + + // rx index we checked in the last run of this function + uint16_t rx_index = queue->rx_index; + // index of the descriptor we checked in the last iteration of the loop + uint16_t last_rx_index = rx_index; + uint32_t buf_index; + + for (buf_index = 0; buf_index < num_bufs; buf_index++) { + // rx descriptors are explained in section 3.2.3 + volatile struct igc_rx_desc* desc_ptr = queue->descriptors + rx_index; + uint8_t status = desc_ptr->status; + + // ixl_debug("Queue Discriptor status: 0x%x", status); + + if (status & IGC_RXD_STAT_DD) { + if (!(status & IGC_RXD_STAT_EOP)) { + ixl_error("multi-segment packets are not supported - " + "increase buffer size or decrease MTU"); + } + + // got a packet, read and copy the whole descriptor + struct igc_rx_desc desc; + memcpy(&desc, (const void *) desc_ptr, sizeof(desc)); + struct pkt_buf* buf = (struct pkt_buf*) queue->virtual_addresses[rx_index]; + buf->size = desc.length; + // this would be the place to implement RX offloading by translating + // the device-specific flags to an independent representation in the + // buf (similiar to how DPDK works) need a new mbuf for the + // descriptor + struct pkt_buf* new_buf = queue->mempool->pkt_buf_alloc(); + if (! new_buf) { + // At this point, we just have to trust in this problem not to + // be caused by a real packet buffer leak or bug (as immediately + // terminating the driver might not be a good idea w.r.t. upper + // layers). Hence, we hope for the best and periodically check + // whether new buffers arrived (On rare occasions, there might + // be false negatives in the current mempool implementation). + ixl_warn("failed to allocate new mbuf for rx, you are either " + "leaking memory or your mempool is too small"); + usleep(1000000); + new_buf = queue->mempool->pkt_buf_alloc(); + } + + // reset the descriptor (new buffer address and zero out flags) + // TODO: Is the zeroing of flags actually necessary for an E1000? + desc_ptr->buf_addr = new_buf->buf_addr_phy + offsetof(struct pkt_buf, data); + desc_ptr->errors = 0; + desc_ptr->status = 0; + + queue->virtual_addresses[rx_index] = new_buf; + bufs[buf_index] = buf; + + // want to read the next one in the next iteration, but we still + // need the last/current to update RDT later + last_rx_index = rx_index; + rx_index = wrap_ring(rx_index, queue->num_entries); + } else { + break; + } + } + if (rx_index != last_rx_index) { + // tell hardware that we are done this is intentionally off by one, + // otherwise we'd set RDT=RDH if we are receiving faster than packets + // are coming in RDT=RDH means queue is full + set_reg32(baddr[0], IGC_RDT, last_rx_index); + queue->rx_index = rx_index; + + // Check whether there are unprocessed descriptors left + uint32_t head = get_reg32(baddr[0], IGC_RDH); + if (head == ((last_rx_index + 1) % (uint32_t) queue->num_entries)) + queue->rx_pending = false; + else + queue->rx_pending = true; + } + + // Perform IRQ bookkeeping + if (interrupt_wait) { + interrupt->rx_pkts += buf_index; + + if ((interrupt->instr_counter++ & 0xFFF) == 0) { + bool int_en = interrupt->interrupt_enabled; + uint64_t diff = device_stats::monotonic_time() - interrupt->last_time_checked; + if (diff > interrupt->interval) { + // every second + check_interrupt(interrupt, diff, buf_index, num_bufs); + } + + if (int_en != interrupt->interrupt_enabled) { + if (interrupt->interrupt_enabled) { + enable_rx_interrupt(queue_id); + } else { + disable_rx_interrupt(queue_id); + } + } + } + } + + // If the driver runs in irq_wait mode, it should take care full + // responsibility of IRQ handling (i.e., IRQ masking and unmasking are not + // done manually by the application). Thus, we have to unmask the receive + // IRQ here to guarantee a swift notification about newly incoming packets. + if (interrupt_wait && interrupt->interrupt_enabled) + Igc_device::ack_recv_irq(queue_id); + + return buf_index; +} + +uint32_t Igc_device::tx_batch(uint16_t queue_id, struct pkt_buf* bufs[], + uint32_t num_bufs) { + struct igc_tx_queue* queue = ((struct igc_tx_queue*) tx_queues) + queue_id; + // the descriptor is explained in section 3.3.2 we just use a struct copy & + // pasted from intel, but it basically has two formats (hence a union): + // 1. the write-back format which is written by the NIC once sending it is + // finished this is used in step 1 + // 2. the read format which is read by the NIC and written by us, this is + // used in step 2 + + uint16_t clean_index = queue->clean_index; // next descriptor to clean up + + // step 1: clean up descriptors that were sent out by the hardware and + // return them to the mempool start by reading step 2 which is done first + // for each packet cleaning up must be done in batches for performance + // reasons, so this is unfortunately somewhat complicated + while (true) { + // figure out how many descriptors can be cleaned up + + // tx_index is always ahead of clean (invariant of our queue) + int32_t cleanable = queue->tx_index - clean_index; + if (cleanable < 0) { + // handle wrap-around + cleanable = queue->num_entries + cleanable; + } + if (cleanable < TX_CLEAN_BATCH) { + break; + } + // calculcate the index of the last transcriptor in the clean batch we + // can't check all descriptors for performance reasons + int32_t cleanup_to = clean_index + TX_CLEAN_BATCH - 1; + if (cleanup_to >= queue->num_entries) { + cleanup_to -= queue->num_entries; + } + volatile struct igc_tx_desc* txd = queue->descriptors + cleanup_to; + // hardware sets this flag as soon as it's sent out, we can give back + // all bufs in the batch back to the mempool comment from linux driver + // abouth this register: /* if DD is not set pending work has not been + // completed */ + if (txd->upper.data & IGC_TXD_STAT_DD) { + int32_t i = clean_index; + while (true) { + struct pkt_buf* buf = (struct pkt_buf *) queue->virtual_addresses[i]; + pkt_buf_free(buf); + if (i == cleanup_to) { + break; + } + i = wrap_ring(i, queue->num_entries); + } + // next descriptor to be cleaned up is one after the one we just + // cleaned + clean_index = wrap_ring(cleanup_to, queue->num_entries); + } else { + // clean the whole batch or nothing; yes, this leaves some packets + // in the queue forever if you stop transmitting, but that's not a + // real concern + break; + } + } + queue->clean_index = clean_index; + + // step 2: send out as many of our packets as possible + uint32_t sent; + for (sent = 0; sent < num_bufs; sent++) { + uint32_t next_index = wrap_ring(queue->tx_index, queue->num_entries); + // we are full if the next index is the one we are trying to reclaim + if (clean_index == next_index) { + break; + } + struct pkt_buf* buf = bufs[sent]; + // remember virtual address to clean it up later + queue->virtual_addresses[queue->tx_index] = (void*) buf; + volatile struct igc_tx_desc* txd = queue->descriptors + queue->tx_index; + queue->tx_index = next_index; + // NIC reads from here + txd->buffer_addr = buf->buf_addr_phy + offsetof(struct pkt_buf, data); + + // Reset descriptor command before sending (otherwise NIC won't emit any + // data!) + txd->lower.data = 0; + txd->lower.flags.length = buf->size; + txd->upper.fields.status = 0; + + + // always the same flags: one buffer (EOP), CRC offload, report status + txd->lower.data |= IGC_TXD_CMD_EOP | IGC_TXD_CMD_IFCS | IGC_TXD_CMD_RS; + + // no fancy offloading stuff - only the total payload length implement + // offloading flags here: * ip checksum offloading is trivial: just set + // the offset * tcp/udp checksum offloading is more annoying, you have + // to precalculate the pseudo-header checksum TODO: Implement TCP / UDP + // offloading here... + } + + // send out by advancing tail, i.e., pass control of the bufs to the nic + // this seems like a textbook case for a release memory order, but Intel's + // driver doesn't even use a compiler barrier here + set_reg32(baddr[0], IGC_TDT, queue->tx_index); + return sent; +} + +void Igc_device::read_stats(struct device_stats *stats) { + // Keep in mind that reading the counters will reset them + uint32_t rx_pkts = get_reg32(baddr[0], IGC_GPRC); + uint32_t tx_pkts = get_reg32(baddr[0], IGC_GPTC); + // Lower reg. resets when higher reg is read + uint64_t rx_bytes = get_reg32(baddr[0], IGC_GORCL) + + (((uint64_t) get_reg32(baddr[0], IGC_GORCH)) << 32); + uint64_t tx_bytes = get_reg32(baddr[0], IGC_GOTCL) + + (((uint64_t) get_reg32(baddr[0], IGC_GOTCH)) << 32); + + // Sum up the counters if a stat object was given + if (stats != NULL) { + stats->rx_pkts += rx_pkts; + stats->tx_pkts += tx_pkts; + stats->rx_bytes += rx_bytes; + stats->tx_bytes += tx_bytes; + } +} + +/* Check, clear and mask the IRQ for the given RX queue. */ +bool Igc_device::check_recv_irq(uint16_t qid) { + (void) qid; + // Nothing to do, we use a 1:1 mapping of RX queue to MSI-X vector. + return interrupts.interrupt_type == IXL_IRQ_MSIX; +} + +/* Re-enable (unmask) the IRQ for the given RX queue. */ +void Igc_device::ack_recv_irq(uint16_t qid) { + // Was auto-cleared via EIAM. On ack set EIMS, to re-enable the IRQ. + set_reg32(baddr[0], IGC_EIMS, 1 << interrupts.queues[qid].msi_vec); +} \ No newline at end of file