//------------------------------------------------------------------- // nicraw.c (Modification of our 'nicf.c' device-driver) // // This is a modification of our 'nicf.c' device-driver which // retains support for filtering out frames whose destination // address does not match this station's MAC-address, but now // the driver's method-functions are revised in a manner that // supports the transfer of 'raw' packets by normal programs. // The 'write()' method no longer prepends an ethernet header // and the 'read()' method no discards such a header, and any // data in a received packet exceeding the user's buffer size // is now discarded instead of being retained for delivery to // the user on a subsequent 'read()' operation. Finally, the // 'ioctl()' method is revised to allow an application to get // the hardware MAC-address of this network interface device. // // NOTE: Written and tested for Linux kernel version 2.6.26.6. // // programmer: ALLAN CRUSE // written on: 13 MAY 2009 //------------------------------------------------------------------- #include // for init_module() #include // for create_proc_read_entry() #include // for pci_get_device() #include // for request_irq() #include // for copy_from_user() #define VENDOR_ID 0x8086 // Intel Corporation #define DEVICE_ID 0x109A // 82573L controller #define N_RX_DESC 8 // Number of Rx-descriptors #define N_TX_DESC 8 // Number of TX-descriptors #define RX_BUFLEN 2048 // Size of each packet-buffer #define TX_BUFLEN 2048 // Size of each packet-buffer #define RX_MEMSIZ (N_RX_DESC * (16 + RX_BUFLEN)) #define TX_MEMSIZ (N_TX_DESC * (16 + TX_BUFLEN)) #define KMEM_SIZE RX_MEMSIZ + TX_MEMSIZ #define INTR_MASK (1<<0)|(1<<1)|(1<<2)|(1<<15)|(1<<4)|(1<<6)|(1<<7) #define MTU 1500 typedef struct { unsigned long long base_address; unsigned short packet_length; unsigned char cksum_offset; unsigned char desc_command; unsigned char desc_status; unsigned char cksum_origin; unsigned short special_info; } TX_DESCRIPTOR; typedef struct { unsigned long long base_address; unsigned short packet_length; unsigned short packet_chksum; unsigned char desc_status; unsigned char desc_errors; unsigned short vlan_tag; } RX_DESCRIPTOR; enum { E1000_CTRL = 0x0000, // Device Control E1000_STATUS = 0x0008, // Device Status E1000_ICR = 0x00C0, // Interrupt Cause Read E1000_ICS = 0x00C8, // Interrupt Cause Set E1000_IMS = 0x00D0, // Interrupt Mask Set E1000_IMC = 0x00D8, // Interrupt Mask Clear E1000_RCTL = 0x0100, // Receive Control E1000_TCTL = 0x0400, // Transmit Control E1000_RDBAL = 0x2800, // Rx-Descriptor Base-Address Low E1000_RDBAH = 0x2804, // Rx-Descriptor Base-Address High E1000_RDLEN = 0x2808, // Rx-Descriptor queue Length E1000_RDH = 0x2810, // Rx-Descriptor Head E1000_RDT = 0x2818, // Rx-Descriptor Tail E1000_RXDCTL = 0x2828, // Rx-Descriptor Control E1000_TDBAL = 0x3800, // Tx-Descriptor Base-Address Low E1000_TDBAH = 0x3804, // Tx-Descriptor Base-Address High E1000_TDLEN = 0x3808, // Tx-Descriptor queue Length E1000_TDH = 0x3810, // Tx-Descriptor Head E1000_TDT = 0x3818, // Tx-Descriptor Tail E1000_TXDCTL = 0x3828, // Tx-Descriptor Control E1000_RA = 0x5400, // Receive-address Array }; char modname[] = "nicf"; char devname[] = "nic"; char info_rx[] = "nicrx"; char info_tx[] = "nictx"; unsigned char mac[6]; int my_major = 97; struct pci_dev *devp; unsigned long mmio_base; unsigned long mmio_size; void *io; void *kmem; unsigned long kmem_phys; RX_DESCRIPTOR *rxring; TX_DESCRIPTOR *txring; wait_queue_head_t wq_recv; wait_queue_head_t wq_xmit; irqreturn_t my_isr( int irq, void *dev_id ) { static int reps = 0; int intr_cause = ioread32( io + E1000_ICR ); if ( ( intr_cause & (1<<31) ) == 0 ) return IRQ_NONE; // NOTE: Our system administrator has asked us not to fill // up the kernel's log-file with these diagnostic messages // (in case a student might forget to remove this module), // so we limit the number interrupts that generate output. if ( reps < 20 ) { printk( " NIC %d: cause=%08X ", ++reps, intr_cause ); if ( intr_cause & (1<< 0) ) printk( "TXDW " ); if ( intr_cause & (1<< 1) ) printk( "TXQE " ); if ( intr_cause & (1<< 2) ) printk( "LC " ); if ( intr_cause & (1<< 4) ) printk( "RXDMT0 " ); if ( intr_cause & (1<< 6) ) printk( "RXO " ); if ( intr_cause & (1<< 7) ) printk( "RXT0 " ); if ( intr_cause & (1<<15) ) printk( "TXDLOW " ); printk( "\n" ); } // If the number of Rx-descriptors "owned" by the nic is // running low, we give the nic another four descriptors if ( intr_cause & (1<<4) ) // Rx-Descriptors Low { int i, rxtail = ioread32( io + E1000_RDT ); for (i = 0; i < 4; i++) { rxring[ rxtail ].desc_status = 0; rxtail = (1 + rxtail) % N_RX_DESC; } iowrite32( rxtail, io + E1000_RDT ); } // In case any process might be asleep, waiting for a new // Rx-descriptor to be 'written back', we awaken it here if ( intr_cause & (1<<7) ) // Rx-descriptor timer expired wake_up_interruptible( &wq_recv ); // In case any process might be asleep, waiting for a new // Tx-descriptor to be 'written back', we awaken it here if ( intr_cause & (1<<0) ) // Tx-Descriptor written back wake_up_interruptible( &wq_xmit ); // notify the nic hardware that we've handled its interrupts iowrite32( intr_cause, io + E1000_ICR ); // notify the kernel software that the interrupt was handled return IRQ_HANDLED; } int my_ioctl( struct inode *inode, struct file *file, unsigned int request, unsigned long address ) { unsigned char *addr = (unsigned char *)address; switch ( request ) { case 0: // get this station's Hardware MAC-address if ( copy_to_user( addr, mac, 6 ) ) return -EFAULT; return 0; // SUCCESS } return -EINVAL; } ssize_t my_write( struct file *file, const char *buf, size_t len, loff_t *pos ) { int txtail = ioread32( io + E1000_TDT ); TX_DESCRIPTOR *dp = &txring[ txtail ]; unsigned char *statp = &dp->desc_status; unsigned char *cp = phys_to_virt( dp->base_address ); int nbytes = ( len < MTU ) ? len : MTU; // sleep if the nic is not yet done with the next descriptor if ( ( *statp & 1 ) == 0 ) { if ( wait_event_interruptible( wq_xmit, *statp & 1 ) ) return -EINTR; } // initialize the packet-data if ( copy_from_user( cp, buf, nbytes ) ) return -EFAULT; // initialize the descriptor's fields dp->packet_length = nbytes; dp->desc_command = (1<<0)|(1<<1)|(1<<3); // EOP, IFCS, RS dp->desc_status = 0; // give the nic "ownership" of this descriptor txtail = (1 + txtail) % N_TX_DESC; iowrite32( txtail, io + E1000_TDT ); // let the kernel know how many bytes were transferred return nbytes; } ssize_t my_read( struct file *file, char *buf, size_t len, loff_t *pos ) { static int rxhead = 0; RX_DESCRIPTOR *dp = &rxring[ rxhead ]; unsigned char *statp = &dp->desc_status; unsigned char *cp = phys_to_virt( dp->base_address ); int count, nbytes; // sleep if the nic has not yet written back this descriptor while ( *statp == 0 ) { if ( wait_event_interruptible( wq_recv, *statp ) < 0 ) return -EINTR; } // get the number data-bytes in this descriptor's packet-buffer count = dp->packet_length; // we cannot return more bytes than can fit in the user's buffer nbytes = ( count > len ) ? len : count; // copy the packet's data to the user's buffer if ( copy_to_user( buf, cp, nbytes ) ) return -EFAULT; // advance 'rxhead' to the next descriptor's array-index rxhead = (1 + rxhead) % N_RX_DESC; // let the kernel know how many bytes were transferred return nbytes; } struct file_operations my_fops = { owner: THIS_MODULE, ioctl: my_ioctl, // <---- write: my_write, read: my_read, }; int my_proc_rx( char *buf, char **start, off_t off, int count, int *eof, void *data ) { int rxtail, rxhead, i, len = 0; rxhead = ioread32( io + E1000_RDH ); rxtail = ioread32( io + E1000_RDT ); len += sprintf( buf+len, "\n Receive-Descriptor Buffer-Area " ); len += sprintf( buf+len, "(head=%d, tail=%d) \n\n", rxhead, rxtail ); for (i = 0; i < N_RX_DESC; i++) { int status = rxring[ i ].desc_status; int errors = rxring[ i ].desc_errors; unsigned long desc_addr = virt_to_phys( rxring + i ); len += sprintf( buf+len, " #%-2d ", i ); len += sprintf( buf+len, "%08lX: ", desc_addr ); len += sprintf( buf+len, "%016llX ", rxring[i].base_address ); len += sprintf( buf+len, "%04X ", rxring[i].packet_length ); len += sprintf( buf+len, "%04X ", rxring[i].packet_chksum ); len += sprintf( buf+len, "%02X ", rxring[i].desc_status ); len += sprintf( buf+len, "%02X ", rxring[i].desc_errors ); len += sprintf( buf+len, "%04X ", rxring[i].vlan_tag ); if ( status & (1<<0) ) len += sprintf( buf+len, "DD " ); if ( status & (1<<1) ) len += sprintf( buf+len, "EOP " ); if ( status & (1<<2) ) len += sprintf( buf+len, "IXSM " ); if ( status & (1<<3) ) len += sprintf( buf+len, "VP " ); if ( status & (1<<5) ) len += sprintf( buf+len, "TCPCS " ); if ( status & (1<<6) ) len += sprintf( buf+len, "IPCS " ); if ( status & (1<<7) ) len += sprintf( buf+len, "PIF " ); len += sprintf( buf+len, " " ); if ( errors & (1<<0) ) len += sprintf( buf+len, "CE " ); if ( errors & (1<<0) ) len += sprintf( buf+len, "FE " ); if ( errors & (1<<0) ) len += sprintf( buf+len, "TCPE " ); if ( errors & (1<<0) ) len += sprintf( buf+len, "IPE " ); if ( errors & (1<<0) ) len += sprintf( buf+len, "RXE " ); len += sprintf( buf+len, "\n" ); } len += sprintf( buf+len, "\n" ); return len; } int my_proc_tx( char *buf, char **start, off_t off, int count, int *eof, void *data ) { int txtail, txhead, i, len = 0; txhead = ioread32( io + E1000_TDH ); txtail = ioread32( io + E1000_TDT ); len += sprintf( buf+len, "\n Transmit-Descriptor Buffer-Area " ); len += sprintf( buf+len, "(head=%d, tail=%d) \n\n", txhead, txtail ); for (i = 0; i < N_TX_DESC; i++) { int status = txring[ i ].desc_status; int command = txring[ i ].desc_command; unsigned long desc_addr = virt_to_phys( txring + i ); len += sprintf( buf+len, " #%-2d ", i ); len += sprintf( buf+len, "%08lX: ", desc_addr ); len += sprintf( buf+len, "%016llX ", txring[i].base_address ); len += sprintf( buf+len, "%04X ", txring[i].packet_length ); len += sprintf( buf+len, "%02X ", txring[i].cksum_offset ); len += sprintf( buf+len, "%02X ", txring[i].desc_command ); len += sprintf( buf+len, "%02X ", txring[i].desc_status ); len += sprintf( buf+len, "%02X ", txring[i].cksum_origin ); len += sprintf( buf+len, "%04X ", txring[i].special_info ); if ( status & (1<<0) ) len += sprintf( buf+len, "DD " ); if ( status & (1<<1) ) len += sprintf( buf+len, "EC " ); if ( status & (1<<2) ) len += sprintf( buf+len, "LC " ); len += sprintf( buf+len, " " ); if ( command & (1<<0) ) len += sprintf( buf+len, "EOP " ); if ( command & (1<<1) ) len += sprintf( buf+len, "IFCS " ); if ( command & (1<<2) ) len += sprintf( buf+len, "ICP " ); if ( command & (1<<3) ) len += sprintf( buf+len, "RS " ); if ( command & (1<<6) ) len += sprintf( buf+len, "VLE " ); if ( command & (1<<7) ) len += sprintf( buf+len, "IDE " ); len += sprintf( buf+len, "\n" ); } len += sprintf( buf+len, "\n" ); return len; } static int __init nic_init( void ) { u16 pcicmd; int i, dev_control, rx_control, tx_control, irq; unsigned long long rxmem_phys, txmem_phys; unsigned long long rxbuff, txbuff; // write confirmation-message to the kernel's log-file printk( "<1>\nInstalling \'%s\' module ", modname ); printk( "(major=%d) \n", my_major ); // check that the Intel 82573L controller is installed devp = pci_get_device( VENDOR_ID, DEVICE_ID, NULL ); if ( !devp ) return -ENODEV; // setup memory-mapping of the controller's registers mmio_base = pci_resource_start( devp, 0 ); mmio_size = pci_resource_len( devp, 0 ); io = ioremap_nocache( mmio_base, mmio_size ); if ( !io ) return -ENOSPC; // copy the Ethernet controller's MAC-address memcpy( mac, io + E1000_RA, 6 ); printk( " Intel PRO1000 Ethernet Interface - MAC=" ); for (i = 0; i < 6; i++) printk( "%02X%c", mac[i], (i<5) ? ':' : ' ' ); printk( "\n" ); // insure Bus Master capability is enabled pci_read_config_word( devp, 4, &pcicmd ); pcicmd |= (1 << 2); pci_write_config_word( devp, 4, pcicmd ); // initialize our driver's wait-queues init_waitqueue_head( &wq_recv ); init_waitqueue_head( &wq_xmit ); // allocate kernel memory for descriptors and buffers kmem = kzalloc( KMEM_SIZE, GFP_KERNEL | GFP_DMA ); if ( !kmem ) { iounmap( io ); return -ENOMEM; } kmem_phys = virt_to_phys( kmem ); printk( " kernel memory allocated at physical address-range " ); printk( "0x%08lX-0x%08lX \n", kmem_phys, kmem_phys + KMEM_SIZE ); rxring = (RX_DESCRIPTOR *)kmem; rxmem_phys = virt_to_phys( rxring ); rxbuff = rxmem_phys + 16 * N_RX_DESC; for (i = 0; i < N_RX_DESC; i++) { rxring[ i ].base_address = rxbuff + i * RX_BUFLEN; rxring[ i ].desc_status = 0; } txring = (TX_DESCRIPTOR *)(kmem + RX_MEMSIZ); txmem_phys = virt_to_phys( txring ); txbuff = txmem_phys + 16 * N_TX_DESC; for (i = 0; i < N_TX_DESC; i++) { txring[ i ].base_address = txbuff + i * TX_BUFLEN; txring[ i ].desc_status = (1<<0); // DD } // setup the configuration-options for 'Device Control' dev_control = 0; dev_control |= (1<<0); // FD-bit (Full Duplex) dev_control |= (0<<2); // GIOMD-bit (GIO Master Disable) dev_control |= (1<<3); // LRST-bit (Link Reset) dev_control |= (1<<6); // SLU-bit (Set Link Up) dev_control |= (2<<8); // SPEED=2 (1000Mbps) dev_control |= (1<<11); // FRCSPD-bit (Force Speed) dev_control |= (0<<12); // FRCDPLX-bit (Force Duplex) dev_control |= (0<<20); // ADVD3WUC-bit (Advertise D3 Wake Up Cap) dev_control |= (1<<26); // RST-bit (Device Reset) dev_control |= (0<<27); // RFCE-bit (Receive Flow Control Enable) dev_control |= (0<<28); // TFCE-bit (Transmit Flow Control Enable) dev_control |= (0<<30); // VME-bit (VLAN Mode Enable) dev_control |= (0<<31); // PHY_RST-bit (PHY Reset) // perform a 'reset' of the network controller iowrite32( 0xFFFFFFFF, io + E1000_IMC ); iowrite32( 0x00000000, io + E1000_STATUS ); iowrite32( dev_control | (1<<26), io + E1000_CTRL ); iowrite32( dev_control & ~(1<<26), io + E1000_CTRL ); udelay( 10000 ); // setup configuration-options for the 'Receive' engine rx_control = 0; rx_control |= (0<<1); // EN-bit (Enable) rx_control |= (1<<2); // SBP-bit (Store Bad Packets) rx_control |= (0<<3); // UPE-bit (Unicast Promiscuous Enable) <---- rx_control |= (0<<4); // MPE-bit (Multicast Promiscuous Enable) <---- rx_control |= (0<<5); // LPE-bit (Long Packet Enable) rx_control |= (0<<6); // LBM=0 (LoopBack Mode) rx_control |= (2<<8); // RDMTS=2 (Rx-Descriptor Min Thresh Size) rx_control |= (0<<10); // DTYPE=0 (Descriptor Type) rx_control |= (0<<12); // MO=0 (Multicast Offset) rx_control |= (1<<15); // BAM-bit (Broadcast Address Enable) rx_control |= (0<<16); // BSIZE=0 (Buffer Size) rx_control |= (0<<18); // VLE-bit (VLAN-filter Enable) rx_control |= (0<<19); // CFIEN-bit (Canonical Form Indicator Enable) rx_control |= (0<<20); // CI=0 (Canonical Form Indicator bit-value) rx_control |= (1<<22); // DPF-bit (Discard Pause Frames) rx_control |= (0<<23); // PMCF-bit (Pass MAC Control Frames) rx_control |= (0<<25); // BSEC-bit (Buffer Size Extension) rx_control |= (1<<26); // SECRC-bit (Strip Ethernet CRC) rx_control |= (2<<27); // FLEXBUF=2K (Flexible Buffer Size) iowrite32( rx_control, io + E1000_RCTL ); iowrite32( rxmem_phys >> 0, io + E1000_RDBAL ); iowrite32( rxmem_phys >> 32, io + E1000_RDBAH ); iowrite32( N_RX_DESC << 4, io + E1000_RDLEN ); iowrite32( 0x01010000, io + E1000_RXDCTL ); // setup configuration-options for the 'Transmit' engine tx_control = 0; tx_control |= (0<<1); // EN-bit (Enable) tx_control |= (1<<3); // PSP-bit (Pad Short Packets) tx_control |= (15<<4); // CT=15 (Collision Threshold) tx_control |= (63<<12); // COLD=63 (Collision Distance) tx_control |= (0<<22); // SWXOFF-bit (Software XOFF) tx_control |= (1<<24); // RTLC=1 (Re-Transmit on Late Collisions) tx_control |= (0<<25); // UNORTX-bit (Underrun No ReTransmit) tx_control |= (0<<26); // TXCSCMT=0 (TxDesc Min Threshold) tx_control |= (0<<28); // MULR-bit (Multiple Request Support) iowrite32( tx_control, io + E1000_TCTL ); iowrite32( txmem_phys >> 0, io + E1000_TDBAL ); iowrite32( txmem_phys >> 32, io + E1000_TDBAH ); iowrite32( N_TX_DESC << 4, io + E1000_TDLEN ); iowrite32( 0x01010000, io + E1000_TXDCTL ); // install our interrupt-handler and enable interrupts irq = devp->irq; if ( request_irq( irq, my_isr, IRQF_SHARED, modname, &modname ) < 0 ) { kfree( kmem ); iounmap( io ); return -EBUSY; } iowrite32( INTR_MASK, io + E1000_IMS ); // trigger a RXDMT0 interrupt to give the nic some RX-descriptors iowrite32( (1<<4), io + E1000_ICS ); // enable the Receive engine iowrite32( ioread32( io + E1000_RCTL ) | (1<<1), io + E1000_RCTL ); // enable the Transmit engine iowrite32( ioread32( io + E1000_TCTL ) | (1<<1), io + E1000_TCTL ); // install our driver's pseudo-files in the '/proc' directory create_proc_read_entry( info_rx, 0, NULL, my_proc_rx, NULL ); create_proc_read_entry( info_tx, 0, NULL, my_proc_tx, NULL ); // install our device-driver's methods return register_chrdev( my_major, devname, &my_fops ); } static void __exit nic_exit(void ) { // remove our device-driver methods unregister_chrdev( my_major, devname ); // disable the Receive engine iowrite32( ioread32( io + E1000_RCTL ) & ~(1<<1), io + E1000_RCTL ); // disable the Transmit engine iowrite32( ioread32( io + E1000_TCTL ) & ~(1<<1), io + E1000_TCTL ); // delete our pseudo-files from the '/proc' directory remove_proc_entry( info_tx, NULL ); remove_proc_entry( info_rx, NULL ); // disable interrupts and remove our interrupt-handler iowrite32( 0xFFFFFFFF, io + E1000_IMC ); free_irq( devp->irq, modname ); // release our allocation of kernel memory kfree( kmem ); // unmap the nic's device-registers iounmap( io ); // write confirmation-message to the kernel's log-file printk( "<1>Removing \'%s\' module\n", modname ); } module_init( nic_init ); module_exit( nic_exit ); MODULE_LICENSE("GPL");