X-Git-Url: https://bbs.cooldavid.org/git/?p=jme.git;a=blobdiff_plain;f=jme.c;h=100a662208229db9a636e87f9bfd0d7918f4c00a;hp=3051cd15fcd42de6877ac917f0d163a5eea06b02;hb=4330c2f2eeef6b8d690560d2666425fa0362109c;hpb=d7699f87f8d114f0b07b7363214d791884080dcd diff --git a/jme.c b/jme.c index 3051cd1..100a662 100644 --- a/jme.c +++ b/jme.c @@ -20,23 +20,50 @@ */ /* - * TODO before release: - * 1. Use sk_buff for dma buffer with pci_map_single, - * and handle scattered sk_buffs (Reduce memory copy) - * 2. Try setting 64bit DMA with pci_set[_consistent]_dma_mask + * Timeline before release: + * Stage 1: Basic Performance / Capbility fine tune. + * - Implement PCC -- Dynamic adjustment. + * - Use NAPI instead of rx_tasklet? + * PCC Support Both Packet Counter and Timeout Interrupt for + * receive and transmit complete, does NAPI really needed? + * I'll add NAPI support anyway.. + * For CPU busy and heavy network loading system.. + * - Try setting 64bit DMA with pci_set[_consistent]_dma_mask * and set netdev feature flag. - * 3. Implement Power Managemt related functions. - * 4. Implement checksum offloading, VLAN offloading, - * TCP Segement offloading. - * 5. Implement Jumboframe. - * 6. Implement NAPI option for user. - * 7. Implement MSI / MSI-X. - * 8. Implement PCC. - * 9. Implement QoS according to "priority" attribute in sk_buff - * with 8 TX priority queue provided by hardware. - * 10.Cleanup/re-orginize code, performence tuneing(alignment etc...). + * (Need to modity transmit descriptor filling policy as well) + * - Use pci_map_page instead of pci_map_single for HIGHMEM support + * + * Stage 2: Error handling. + * - Wathch dog + * - Transmit timeout + * + * Stage 3: Basic offloading support. + * - Implement scatter-gather offloading. + * A system page per RX (buffer|descriptor)? + * Handle fraged sk_buff to TX descriptors. + * - Implement tx/rx ipv6/ip/tcp/udp checksum offloading + * + * Stage 4: Basic feature support. + * - Implement Power Managemt related functions. + * - Implement Jumboframe. + * - Implement MSI. + * + * Stage 5: Advanced offloading support. + * - Implement VLAN offloading. + * - Implement TCP Segement offloading. + * + * Stage 6: CPU Load balancing. + * - Implement MSI-X. + * Along with multiple RX queue, for CPU load balancing. + * - Use Multiple TX Queue for Multiple CPU Transmit + * Simultaneously Without Lock. + * + * Stage 7: + * - Cleanup/re-orginize code, performence tuneing(alignment etc...). + * - Test and Release 1.0 */ +#include #include #include #include @@ -45,8 +72,17 @@ #include #include #include +#include #include "jme.h" +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21) +static struct net_device_stats *jme_get_stats(struct net_device *netdev) +{ + struct jme_adapter *jme = netdev_priv(netdev); + return &jme->stats; +} +#endif + static int jme_mdio_read(struct net_device *netdev, int phy, int reg) { struct jme_adapter *jme = netdev_priv(netdev); @@ -64,7 +100,7 @@ static int jme_mdio_read(struct net_device *netdev, int phy, int reg) } if (i == 0) { - dprintk("phy read timeout : %d\n", reg); + jeprintk(netdev->name, "phy read timeout : %d\n", reg); return (0); } @@ -89,7 +125,7 @@ static void jme_mdio_write(struct net_device *netdev, int phy, int reg, int val) } if (i == 0) - dprintk("phy write timeout : %d\n", reg); + jeprintk(netdev->name, "phy write timeout : %d\n", reg); return; } @@ -104,15 +140,18 @@ static void jme_reset_mac_processor(struct jme_adapter *jme) udelay(2); val &= ~GHC_SWRST; jwrite32(jme, JME_GHC, val); - jwrite32(jme, JME_RXMCHT, 0x00000000); - jwrite32(jme, JME_RXMCHT+4, 0x00000000); + jwrite32(jme, JME_RXMCHT_LO, 0x00000000); + jwrite32(jme, JME_RXMCHT_HI, 0x00000000); jwrite32(jme, JME_WFODP, 0); jwrite32(jme, JME_WFOI, 0); + jwrite32(jme, JME_GPREG0, GPREG0_DEFAULT); + jwrite32(jme, JME_GPREG1, 0); } __always_inline static void jme_clear_pm(struct jme_adapter *jme) { jwrite32(jme, JME_PMCS, 0xFFFF0000); + pci_set_power_state(jme->pdev, PCI_D0); } static int jme_reload_eeprom(struct jme_adapter *jme) @@ -138,7 +177,7 @@ static int jme_reload_eeprom(struct jme_adapter *jme) } if(i == 0) { - dprintk("eeprom reload timeout\n"); + jeprintk(jme->dev->name, "eeprom reload timeout\n"); return -EIO; } } @@ -154,12 +193,12 @@ __always_inline static void jme_load_macaddr(struct net_device *netdev) unsigned char macaddr[6]; __u32 val; - val = jread32(jme, JME_RXUMA); + val = jread32(jme, JME_RXUMA_LO); macaddr[0] = (val >> 0) & 0xFF; macaddr[1] = (val >> 8) & 0xFF; macaddr[2] = (val >> 16) & 0xFF; macaddr[3] = (val >> 24) & 0xFF; - val = jread32(jme, JME_RXUMA+4); + val = jread32(jme, JME_RXUMA_HI); macaddr[4] = (val >> 0) & 0xFF; macaddr[5] = (val >> 8) & 0xFF; memcpy(netdev->dev_addr, macaddr, 6); @@ -181,7 +220,20 @@ __always_inline static void jme_stop_irq(struct jme_adapter *jme) jwrite32(jme, JME_IENC, INTR_ENABLE); } -__always_inline static void jme_check_link(struct net_device *netdev) + +__always_inline static void jme_enable_shadow(struct jme_adapter *jme) +{ + jwrite32(jme, + JME_SHBA_LO, + ((__u32)jme->shadow_dma & ~((__u32)0x1F)) | SHBA_POSTEN); +} + +__always_inline static void jme_disable_shadow(struct jme_adapter *jme) +{ + jwrite32(jme, JME_SHBA_LO, 0x0); +} + +static void jme_check_link(struct net_device *netdev) { struct jme_adapter *jme = netdev_priv(netdev); __u32 phylink, ghc, cnt = JME_AUTONEG_TIMEOUT; @@ -199,7 +251,7 @@ __always_inline static void jme_check_link(struct net_device *netdev) } if(!cnt) - printk(KERN_ERR "Waiting autoneg timeout.\n"); + jeprintk(netdev->name, "Waiting autoneg timeout.\n"); switch(phylink & PHY_LINK_SPEED_MASK) { case PHY_LINK_SPEED_10M: @@ -232,121 +284,127 @@ __always_inline static void jme_check_link(struct net_device *netdev) TXMCS_CARRIERSENSE | TXMCS_COLLISION); - jprintk("Link is up at %s.\n", linkmsg); + jprintk(netdev->name, "Link is up at %s.\n", linkmsg); netif_carrier_on(netdev); } else { - jprintk("Link is down.\n"); + jprintk(netdev->name, "Link is down.\n"); netif_carrier_off(netdev); } } -__always_inline static void jme_set_new_txdesc(struct jme_adapter *jme, - int i, int framesize) +static void jme_link_change_tasklet(unsigned long arg) +{ + struct jme_adapter *jme = (struct jme_adapter*)arg; + jme_check_link(jme->dev); +} + +static void jme_set_new_txdesc(struct jme_adapter *jme, + int i, struct sk_buff *skb) { struct jme_ring *txring = jme->txring; - struct TxDesc* txdesc = txring->desc; + register struct TxDesc* txdesc = txring->desc; + struct jme_buffer_info *txbi = txring->bufinf; + dma_addr_t dmaaddr; + + txdesc += i; + txbi += i; + + dmaaddr = pci_map_single(jme->pdev, + skb->data, + skb->len, + PCI_DMA_TODEVICE); + + pci_dma_sync_single_for_device(jme->pdev, + dmaaddr, + skb->len, + PCI_DMA_TODEVICE); - memset(txdesc + i, 0, TX_DESC_SIZE); - txdesc[i].desc1.bufaddr = cpu_to_le32(ALIGN(txring->buf_dma[i], 8)); - txdesc[i].desc1.datalen = cpu_to_le16(TX_BUF_SIZE); - txdesc[i].desc1.pktsize = cpu_to_le16(framesize); + txdesc->dw[0] = 0; + txdesc->dw[1] = 0; + txdesc->dw[2] = 0; + txdesc->desc1.bufaddr = cpu_to_le32(dmaaddr); + txdesc->desc1.datalen = cpu_to_le16(skb->len); + txdesc->desc1.pktsize = cpu_to_le16(skb->len); /* * Set OWN bit at final. * When kernel transmit faster than NIC last packet sent, - * and NIC tring to send this descriptor before we tell + * and NIC trying to send this descriptor before we tell * it to start sending this TX queue. * Other fields are already filled correctly. */ wmb(); - txdesc[i].desc1.flags = TXFLAG_OWN | TXFLAG_INT; - - dprintk("TX Ring Buf Address(%08x,%08x,%d).\n", - txring->buf_dma[i], - (txdesc[i].all[12] << 0) | - (txdesc[i].all[13] << 8) | - (txdesc[i].all[14] << 16) | - (txdesc[i].all[15] << 24), - (txdesc[i].all[4] << 0) | - (txdesc[i].all[5] << 8)); + txdesc->desc1.flags = TXFLAG_OWN | TXFLAG_INT; + txbi->skb = skb; + txbi->mapping = dmaaddr; + txbi->len = skb->len; + +#ifdef TX_QUEUE_DEBUG + dprintk(jme->dev->name, "TX Ring Buf Address(%08x,%08x,%d).\n", + dmaaddr, + (txdesc->all[12] << 0) | + (txdesc->all[13] << 8) | + (txdesc->all[14] << 16) | + (txdesc->all[15] << 24), + (txdesc->all[4] << 0) | + (txdesc->all[5] << 8)); +#endif } -__always_inline static int jme_setup_tx_resources(struct jme_adapter *jme) +static int jme_setup_tx_resources(struct jme_adapter *jme) { - int i; struct jme_ring *txring = &(jme->txring[0]); txring->alloc = dma_alloc_coherent(&(jme->pdev->dev), TX_RING_ALLOC_SIZE, &(txring->dmaalloc), GFP_KERNEL); - if(!txring->alloc) + if(!txring->alloc) { + txring->desc = NULL; + txring->dmaalloc = 0; + txring->dma = 0; return -ENOMEM; + } /* * 16 Bytes align */ - txring->desc = (void*)ALIGN((unsigned long)(txring->alloc), 16); - txring->dma = ALIGN(txring->dmaalloc, 16); + txring->desc = (void*)ALIGN((unsigned long)(txring->alloc), RING_DESC_ALIGN); + txring->dma = ALIGN(txring->dmaalloc, RING_DESC_ALIGN); txring->next_to_use = 0; txring->next_to_clean = 0; - dprintk("TX Ring Base Address(%08x,%08x).\n", +#ifdef TX_QUEUE_DEBUG + dprintk(jme->dev->name, "TX Ring Base Address(%08x,%08x).\n", (__u32)txring->desc, txring->dma); +#endif /* * Initiallize Transmit Descriptors */ memset(txring->alloc, 0, TX_RING_ALLOC_SIZE); - for(i = 0 ; i < RING_DESC_NR ; ++i) { - txring->buf_virt[i] = dma_alloc_coherent(&(jme->pdev->dev), - TX_BUF_ALLOC_SIZE, - &(txring->buf_dma[i]), - GFP_KERNEL); - if(!txring->buf_virt[i]) - break; - } - - /* - * Cleanup allocated memories when error - */ - if(i != RING_DESC_NR) { - for(--i ; i >= 0 ; --i) { - dma_free_coherent(&(jme->pdev->dev), - TX_BUF_ALLOC_SIZE, - txring->buf_virt[i], - txring->buf_dma[i]); - } - dma_free_coherent(&(jme->pdev->dev), - TX_RING_ALLOC_SIZE, - txring->alloc, - txring->dmaalloc); - txring->alloc = NULL; - txring->desc = NULL; - txring->dmaalloc = 0; - txring->dma = 0; - return -ENOMEM; - } - + memset(txring->bufinf, 0, sizeof(struct jme_buffer_info) * RING_DESC_NR); return 0; } -__always_inline static void jme_free_tx_resources(struct jme_adapter *jme) +static void jme_free_tx_resources(struct jme_adapter *jme) { int i; struct jme_ring *txring = &(jme->txring[0]); + struct jme_buffer_info *txbi = txring->bufinf; if(txring->alloc) { - for(i = 0 ; i < RING_DESC_NR ; ++i) { - if(txring->buf_virt[i]) { - dma_free_coherent(&(jme->pdev->dev), - TX_BUF_ALLOC_SIZE, - txring->buf_virt[i], - txring->buf_dma[i]); + for(i=0;ibufinf + i; + if(txbi->skb) { + dev_kfree_skb(txbi->skb); + txbi->skb = NULL; + txbi->mapping = 0; + txbi->len = 0; } } @@ -366,6 +424,8 @@ __always_inline static void jme_free_tx_resources(struct jme_adapter *jme) __always_inline static void jme_enable_tx_engine(struct jme_adapter *jme) { + __u8 mrrs; + /* * Select Queue 0 */ @@ -382,13 +442,29 @@ __always_inline static void jme_enable_tx_engine(struct jme_adapter *jme) */ jwrite32(jme, JME_TXQDC, RING_DESC_NR); + /* + * Get Max Read Req Size from PCI Config Space + */ + pci_read_config_byte(jme->pdev, PCI_CONF_DCSR_MRRS, &mrrs); + switch(mrrs) { + case MRRS_128B: + jme->reg_txcs = TXCS_DEFAULT | TXCS_DMASIZE_128B; + break; + case MRRS_256B: + jme->reg_txcs = TXCS_DEFAULT | TXCS_DMASIZE_256B; + break; + default: + jme->reg_txcs = TXCS_DEFAULT | TXCS_DMASIZE_512B; + break; + }; + /* * Enable TX Engine */ wmb(); - jwrite32(jme, JME_TXCS, TXCS_DEFAULT | - TXCS_SELECT_QUEUE0 | - TXCS_ENABLE); + jwrite32(jme, JME_TXCS, jme->reg_txcs | + TXCS_SELECT_QUEUE0 | + TXCS_ENABLE); } @@ -400,7 +476,7 @@ __always_inline static void jme_disable_tx_engine(struct jme_adapter *jme) /* * Disable TX Engine */ - jwrite32(jme, JME_TXCS, TXCS_DEFAULT); + jwrite32(jme, JME_TXCS, jme->reg_txcs); val = jread32(jme, JME_TXCS); for(i = JME_TX_DISABLE_TIMEOUT ; (val & TXCS_ENABLE) && i > 0 ; --i) @@ -410,37 +486,92 @@ __always_inline static void jme_disable_tx_engine(struct jme_adapter *jme) } if(!i) - printk(KERN_ERR "Disable TX engine timeout.\n"); + jeprintk(jme->dev->name, "Disable TX engine timeout.\n"); } -__always_inline static void jme_set_clean_rxdesc(struct jme_adapter *jme, - int i) +static void jme_set_clean_rxdesc(struct jme_adapter *jme, int i) { struct jme_ring *rxring = jme->rxring; - struct RxDesc* rxdesc = rxring->desc; - - memset(rxdesc + i, 0, RX_DESC_SIZE); - rxdesc[i].desc1.bufaddrl = cpu_to_le32(ALIGN(rxring->buf_dma[i], 8)); - rxdesc[i].desc1.datalen = cpu_to_le16(RX_BUF_SIZE); + register struct RxDesc* rxdesc = rxring->desc; + struct jme_buffer_info *rxbi = rxring->bufinf; + rxdesc += i; + rxbi += i; + + rxdesc->dw[0] = 0; + rxdesc->dw[1] = 0; + rxdesc->desc1.bufaddrh = cpu_to_le32(((__u64)rxbi->mapping) >> 32); + rxdesc->desc1.bufaddrl = cpu_to_le32(rxbi->mapping); + rxdesc->desc1.datalen = cpu_to_le16(RX_BUF_SIZE); wmb(); - rxdesc[i].desc1.flags = RXFLAG_OWN | RXFLAG_INT; + rxdesc->desc1.flags = RXFLAG_OWN | RXFLAG_INT; #ifdef RX_QUEUE_DEBUG - dprintk("RX Ring Buf Address(%08x,%08x,%d).\n", - rxring->buf_dma[i], - (rxdesc[i].all[12] << 0) | - (rxdesc[i].all[13] << 8) | - (rxdesc[i].all[14] << 16) | - (rxdesc[i].all[15] << 24), - (rxdesc[i].all[4] << 0) | - (rxdesc[i].all[5] << 8)); + dprintk(jme->dev->name, "RX Ring Buf Address(%08x,%08x,%d).\n", + rxbi->mapping, + (rxdesc->all[12] << 0) | + (rxdesc->all[13] << 8) | + (rxdesc->all[14] << 16) | + (rxdesc->all[15] << 24), + (rxdesc->all[4] << 0) | + (rxdesc->all[5] << 8)); #endif } -__always_inline static int jme_setup_rx_resources(struct jme_adapter *jme) +static int jme_make_new_rx_buf(struct jme_adapter *jme, int i) +{ + struct jme_ring *rxring = &(jme->rxring[0]); + struct jme_buffer_info *rxbi = rxring->bufinf; + unsigned long offset; + struct sk_buff* skb; + + skb = netdev_alloc_skb(jme->dev, RX_BUF_ALLOC_SIZE); + if(unlikely(!skb)) + return -ENOMEM; + if(unlikely(skb_shinfo(skb)->nr_frags)) { + dprintk(jme->dev->name, "Allocated skb fragged(%d).\n", skb_shinfo(skb)->nr_frags); + dev_kfree_skb(skb); + return -ENOMEM; + } + + + if(unlikely( + offset = + (unsigned long)(skb->data) + & (unsigned long)(RX_BUF_DMA_ALIGN - 1))) { + skb_reserve(skb, RX_BUF_DMA_ALIGN - offset); + } + + rxbi += i; + rxbi->skb = skb; + rxbi->mapping = pci_map_single(jme->pdev, + skb->data, + RX_BUF_SIZE, + PCI_DMA_FROMDEVICE); + + return 0; +} + +static void jme_free_rx_buf(struct jme_adapter *jme, int i) +{ + struct jme_ring *rxring = &(jme->rxring[0]); + struct jme_buffer_info *rxbi = rxring->bufinf; + rxbi += i; + + if(rxbi->skb) { + pci_unmap_single(jme->pdev, + rxbi->mapping, + RX_BUF_SIZE, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(rxbi->skb); + rxbi->skb = NULL; + rxbi->mapping = 0; + } +} + +static int jme_setup_rx_resources(struct jme_adapter *jme) { int i; struct jme_ring *rxring = &(jme->rxring[0]); @@ -449,19 +580,23 @@ __always_inline static int jme_setup_rx_resources(struct jme_adapter *jme) RX_RING_ALLOC_SIZE, &(rxring->dmaalloc), GFP_KERNEL); - if(!rxring->alloc) + if(!rxring->alloc) { + rxring->desc = NULL; + rxring->dmaalloc = 0; + rxring->dma = 0; return -ENOMEM; + } /* * 16 Bytes align */ - rxring->desc = (void*)ALIGN((unsigned long)(rxring->alloc), 16); - rxring->dma = ALIGN(rxring->dmaalloc, 16); + rxring->desc = (void*)ALIGN((unsigned long)(rxring->alloc), RING_DESC_ALIGN); + rxring->dma = ALIGN(rxring->dmaalloc, RING_DESC_ALIGN); rxring->next_to_use = 0; rxring->next_to_clean = 0; #ifdef RX_QUEUE_DEBUG - dprintk("RX Ring Base Address(%08x,%08x).\n", + dprintk(jme->dev->name, "RX Ring Base Address(%08x,%08x).\n", (__u32)rxring->desc, rxring->dma); #endif @@ -470,11 +605,7 @@ __always_inline static int jme_setup_rx_resources(struct jme_adapter *jme) * Initiallize Receive Descriptors */ for(i = 0 ; i < RING_DESC_NR ; ++i) { - rxring->buf_virt[i] = dma_alloc_coherent(&(jme->pdev->dev), - RX_BUF_ALLOC_SIZE, - &(rxring->buf_dma[i]), - GFP_KERNEL); - if(!rxring->buf_virt[i]) + if(unlikely(jme_make_new_rx_buf(jme, i))) break; jme_set_clean_rxdesc(jme, i); @@ -484,12 +615,9 @@ __always_inline static int jme_setup_rx_resources(struct jme_adapter *jme) * Cleanup allocated memories when error */ if(i != RING_DESC_NR) { - for(--i ; i >= 0 ; --i) { - dma_free_coherent(&(jme->pdev->dev), - RX_BUF_ALLOC_SIZE, - rxring->buf_virt[i], - rxring->buf_dma[i]); - } + for(--i ; i >= 0 ; --i) + jme_free_rx_buf(jme, i); + dma_free_coherent(&(jme->pdev->dev), RX_RING_ALLOC_SIZE, rxring->alloc, @@ -504,20 +632,14 @@ __always_inline static int jme_setup_rx_resources(struct jme_adapter *jme) return 0; } -__always_inline static void jme_free_rx_resources(struct jme_adapter *jme) +static void jme_free_rx_resources(struct jme_adapter *jme) { int i; struct jme_ring *rxring = &(jme->rxring[0]); if(rxring->alloc) { - for(i = 0 ; i < RING_DESC_NR ; ++i) { - if(rxring->buf_virt[i]) { - dma_free_coherent(&(jme->pdev->dev), - RX_BUF_ALLOC_SIZE, - rxring->buf_virt[i], - rxring->buf_dma[i]); - } - } + for(i = 0 ; i < RING_DESC_NR ; ++i) + jme_free_rx_buf(jme, i); dma_free_coherent(&(jme->pdev->dev), RX_RING_ALLOC_SIZE, @@ -534,8 +656,6 @@ __always_inline static void jme_free_rx_resources(struct jme_adapter *jme) __always_inline static void jme_enable_rx_engine(struct jme_adapter *jme) { - __u32 val; - /* * Setup RX DMA Bass Address */ @@ -555,12 +675,26 @@ __always_inline static void jme_enable_rx_engine(struct jme_adapter *jme) /* * Enable RX Engine */ + wmb(); - val = jread32(jme, JME_RXCS); - val |= RXCS_ENABLE | RXCS_QST; - jwrite32(jme, JME_RXCS, val); + jwrite32(jme, JME_RXCS, RXCS_DEFAULT | + RXCS_QUEUESEL_Q0 | + RXCS_ENABLE | + RXCS_QST); } +__always_inline static void jme_restart_rx_engine(struct jme_adapter *jme) +{ + /* + * Enable RX Engine + */ + jwrite32(jme, JME_RXCS, RXCS_DEFAULT | + RXCS_QUEUESEL_Q0 | + RXCS_ENABLE | + RXCS_QST); +} + + __always_inline static void jme_disable_rx_engine(struct jme_adapter *jme) { int i; @@ -581,27 +715,66 @@ __always_inline static void jme_disable_rx_engine(struct jme_adapter *jme) } if(!i) - printk(KERN_ERR "Disable RX engine timeout.\n"); + jeprintk(jme->dev->name, "Disable RX engine timeout.\n"); } -__always_inline static void jme_process_tx_complete(struct net_device *netdev) +static void jme_tx_clean_tasklet(unsigned long arg) { - /* - * Clear sk_buff here in the future - * (Allowing NIC directly DMA with sk_buff kernel requested to send) - */ + struct jme_adapter *jme = (struct jme_adapter*)arg; + struct jme_ring *txring = &(jme->txring[0]); + struct TxDesc *txdesc = txring->desc; + struct jme_buffer_info *txbi = txring->bufinf, *ctxbi; + struct sk_buff *skb; + int i, end; + +#ifdef TX_TASKLET_DEBUG + dprintk(jme->dev->name, "into tasklet\n"); +#endif + + end = txring->next_to_use; + for(i = txring->next_to_clean ; i != end ; ) { + ctxbi = txbi + i; + skb = ctxbi->skb; + if(skb && !(txdesc[i].desc1.flags & TXFLAG_OWN)) { + +#ifdef TX_TASKLET_DEBUG + dprintk(jme->dev->name, "cleaning %d\n", i); +#endif + + pci_unmap_single(jme->pdev, + ctxbi->mapping, + skb->len, + PCI_DMA_TODEVICE); + + dev_kfree_skb(skb); + prefetch(txbi + i + 1); + prefetch(txdesc + i + 1); + ctxbi->skb = NULL; + ctxbi->mapping = 0; + ctxbi->len = skb->len; + } + else { + break; + } + + if(unlikely(++i == RING_DESC_NR)) + i = 0; + } + txring->next_to_clean = i; + } -__always_inline static void jme_process_receive(struct net_device *netdev) +static void jme_process_receive(struct jme_adapter *jme) { - struct jme_adapter *jme = netdev_priv(netdev); + struct net_device *netdev = jme->dev; struct jme_ring *rxring = &(jme->rxring[0]); - struct RxDesc *rxdesc; - __u8 *rxbuf; + struct RxDesc *rxdesc = rxring->desc; + struct jme_buffer_info *rxbi; struct sk_buff *skb; - int i, start, cnt; - int framesize, desccnt; + dma_addr_t buf_dma; + int i, j, start, cnt, ccnt; + unsigned int framesize, desccnt; /* * Assume that one descriptor per frame, @@ -609,23 +782,31 @@ __always_inline static void jme_process_receive(struct net_device *netdev) * (or not? If buffer already large enough to store entire packet.) */ - rxdesc = rxring->desc; - spin_lock(&jme->recv_lock); i = start = rxring->next_to_clean; /* * Decide how many descriptors need to be processed - * We have to process entire queue in worst case + * In the worst cast we'll have to process entire queue */ - for(cnt = 0 ; cnt < RING_DESC_NR ; ++cnt) + for(cnt = 0 ; cnt < RING_DESC_NR ; ) { - if(rxdesc[i].descwb.flags & RXWBFLAG_OWN) { + rxdesc = (struct RxDesc*)(rxring->desc) + i; + if((rxdesc->descwb.flags & RXWBFLAG_OWN) || + !(rxdesc->descwb.desccnt & RXWBDCNT_WBCPL) + ) { rxring->next_to_clean = i; break; } - if(unlikely(++i == RING_DESC_NR)) - i = 0; + desccnt = rxdesc->descwb.desccnt & RXWBDCNT_DCNT; + + if(unlikely((cnt += desccnt) >= RING_DESC_NR)) { + cnt -= desccnt; + break; + } + + if(unlikely((i += desccnt) >= RING_DESC_NR)) + i -= RING_DESC_NR; } spin_unlock(&jme->recv_lock); @@ -634,50 +815,105 @@ __always_inline static void jme_process_receive(struct net_device *netdev) * --- save for multiple cpu handling */ for( i = start ; cnt-- ; ) { - /* - * Pass received packet to kernel - */ - rxbuf = (void*)ALIGN((unsigned long)(rxring->buf_virt[i]), 8); - desccnt = rxdesc[i].descwb.desccnt & RXWBDCNT_DCNT; - framesize = le16_to_cpu(rxdesc[i].descwb.framesize); - skb = dev_alloc_skb(framesize); - if(!skb) { - printk(KERN_ERR PFX "Out of memory.\n"); - ++(netdev->stats.rx_dropped); + rxdesc = (struct RxDesc*)(rxring->desc) + i; + desccnt = rxdesc->descwb.desccnt & RXWBDCNT_DCNT; + rxbi = rxring->bufinf + i; + if(unlikely( + /* + * Drop and record error packet + */ + rxdesc->descwb.errstat & RXWBERR_ALLERR || + desccnt > 1)) { + if(rxdesc->descwb.errstat & RXWBERR_OVERUN) + ++(NET_STAT.rx_fifo_errors); + else if(rxdesc->descwb.errstat & RXWBERR_CRCERR) + ++(NET_STAT.rx_frame_errors); + else { + ++(NET_STAT.rx_errors); +#ifdef RX_ERR_DEBUG + dprintk(netdev->name, "err: %02x\n", rxdesc->descwb.errstat); +#endif + } + + if(desccnt > 1) + cnt -= desccnt-1; + + for(j=i,ccnt=desccnt;ccnt--;) { + jme_set_clean_rxdesc(jme, j); + + if(unlikely(++j == RING_DESC_NR)) + j = 0; + } } else { - skb_put(skb, framesize); - skb_copy_to_linear_data(skb, rxbuf, framesize); - skb->protocol = eth_type_trans(skb, netdev); - netif_rx(skb); - - netdev->last_rx = jiffies; - netdev->stats.rx_bytes += framesize; - ++(netdev->stats.rx_packets); + /* + * Pass received packet to kernel + */ + skb = rxbi->skb; + buf_dma = rxbi->mapping; + pci_dma_sync_single_for_cpu(jme->pdev, + buf_dma, + RX_BUF_SIZE, + PCI_DMA_FROMDEVICE); + + if(unlikely(jme_make_new_rx_buf(jme, i))) { + pci_dma_sync_single_for_device(jme->pdev, + buf_dma, + RX_BUF_SIZE, + PCI_DMA_FROMDEVICE); + ++(NET_STAT.rx_dropped); + } + else { + framesize = le16_to_cpu(rxdesc->descwb.framesize); + + skb_put(skb, framesize); + skb->protocol = eth_type_trans(skb, netdev); + + netif_rx(skb); + + if(le16_to_cpu(rxdesc->descwb.flags) & RXWBFLAG_DEST_MUL) + ++(NET_STAT.multicast); + + netdev->last_rx = jiffies; + NET_STAT.rx_bytes += framesize; + ++(NET_STAT.rx_packets); + } + + jme_set_clean_rxdesc(jme, i); + +#ifdef RX_PKT_DEBUG + dprintk(netdev->name, "DESCCNT: %u, FSIZE: %u, ADDRH: %08x, " + "ADDRL: %08x, FLAGS: %04x, STAT: %02x, " + "DST:%02x:%02x:%02x:%02x:%02x:%02x\n", + desccnt, + framesize, + le32_to_cpu(rxdesc->dw[2]), + le32_to_cpu(rxdesc->dw[3]), + le16_to_cpu(rxdesc->descwb.flags), + rxdesc->descwb.errstat, + rxbuf[0], rxbuf[1], rxbuf[2], + rxbuf[3], rxbuf[4], rxbuf[5]); +#endif + + } - dprintk("DESCCNT: %u, FSIZE: %u, ADDRH: %08x, " - "ADDRL: %08x, FLAGS: %04x, STAT: %02x, " - "DST:%02x:%02x:%02x:%02x:%02x:%02x, " - "DSTCRC: %d\n", - desccnt, - framesize, - le32_to_cpu(rxdesc[i].dw[2]), - le32_to_cpu(rxdesc[i].dw[3]), - le16_to_cpu(rxdesc[i].descwb.flags), - rxdesc[i].descwb.stat, - rxbuf[0], rxbuf[1], rxbuf[2], - rxbuf[3], rxbuf[4], rxbuf[5], - ether_crc(ETH_ALEN, rxbuf) & 0x3F); + if(unlikely((i+=desccnt) >= RING_DESC_NR)) + i -= RING_DESC_NR; - /* - * Cleanup descriptor for next receive - */ - jme_set_clean_rxdesc(jme, i); + } - if(unlikely(++i == RING_DESC_NR)) - i = 0; +} + +static void jme_rx_clean_tasklet(unsigned long arg) +{ + struct jme_adapter *jme = (struct jme_adapter*)arg; + + jme_process_receive(jme); + if(jme->flags & JME_FLAG_RXQ0_EMPTY) { + jme_restart_rx_engine(jme); + jme->flags &= ~JME_FLAG_RXQ0_EMPTY; } } @@ -687,129 +923,119 @@ static irqreturn_t jme_intr(int irq, void *dev_id) struct net_device *netdev = dev_id; struct jme_adapter *jme = netdev_priv(netdev); irqreturn_t rc = IRQ_HANDLED; - __u32 intrstat = jread32(jme, JME_IEVE); -#ifdef RX_QUEUE_DEBUG - __u32 val; + __u32 intrstat; + +#if USE_IEVE_SHADOW + pci_dma_sync_single_for_cpu(jme->pdev, + jme->shadow_dma, + sizeof(__u32) * SHADOW_REG_NR, + PCI_DMA_FROMDEVICE); + intrstat = jme->shadow_regs[SHADOW_IEVE]; + jme->shadow_regs[SHADOW_IEVE] = 0; +#else + intrstat = jread32(jme, JME_IEVE); #endif -#if 0 - /* - * Don't disable interrupt, the driver should be - * working fine with multiple interrupt handling - * at the same time. (When Multi-core CPU) - */ - /* - * Temporary disable all Interrupts From Our NIC - */ - jwrite32(jme, JME_IENC, INTR_ENABLE); - wmb(); +#ifdef INTERRUPT_DEBUG + dprintk(netdev->name, "Interrupt received(%08x) @ %lu.\n", intrstat, jiffies); #endif - dprintk("Interrupt received(%08x).\n", intrstat); - - /* * Check if it's really an interrupt for us * and if the device still exist */ - if((intrstat & INTR_ENABLE) == 0 || intrstat == ~0) { + if((intrstat & INTR_ENABLE) == 0) { + rc = IRQ_NONE; + goto out; + } + if(unlikely(intrstat == ~((typeof(intrstat))0))) { rc = IRQ_NONE; goto out; } + if(intrstat & INTR_LINKCH) { /* * Process Link status change event */ - jme_check_link(netdev); - - /* - * Write 1 clear Link status change Interrupt - */ - jwrite32(jme, JME_IEVE, INTR_LINKCH); + tasklet_schedule(&jme->linkch_task); } - if(intrstat & INTR_RX0) { + if(intrstat & INTR_RX0EMP) { /* * Process event */ - jme_process_receive(netdev); - - /* - * Write 1 clear Interrupt - */ - jwrite32(jme, JME_IEVE, INTR_RX0); - - dprintk("Received From Queue 0.\n"); - -#ifdef RX_QUEUE_DEBUG - //Poll out the Receive Queue Next Descriptor Address/Status - val = jread32(jme, JME_RXCS); - val |= RXCS_QST; - jwrite32(jme, JME_RXCS, val); - wmb(); - val = jread32(jme, JME_RXNDA); - dprintk("NEXT_RX_DESC.(%08x)\n", val); -#endif + jme->flags |= JME_FLAG_RXQ0_EMPTY; + jeprintk(netdev->name, "Ranout of Receive Queue 0.\n"); } - if(intrstat & INTR_RX0EMP) { + if(intrstat & INTR_RX0) { /* - * Write 1 clear Interrupt + * Process event */ - jwrite32(jme, JME_IEVE, INTR_RX0EMP); + tasklet_schedule(&jme->rxclean_task); - dprintk("Received Queue 0 is running-out.\n"); +#ifdef RX_PKT_DEBUG + dprintk(netdev->name, "Received From Queue 0.\n"); +#endif } if(intrstat & INTR_TX0) { /* * Process event */ - jme_process_tx_complete(netdev); - - /* - * Write 1 clear Interrupt - */ - jwrite32(jme, JME_IEVE, INTR_TX0); + tasklet_schedule(&jme->txclean_task); - dprintk("Queue 0 transmit complete.\n"); +#ifdef TX_PKT_DEBUG + dprintk(netdev->name, "Queue 0 transmit complete.\n"); +#endif } -out: + if((intrstat & ~INTR_ENABLE) != 0) { +#ifdef INTERRUPT_DEBUG + dprintk(netdev->name, "Some interrupt event not handled: %08x\n", intrstat & ~INTR_ENABLE); +#endif + } -#if 0 /* - * Re-enable interrupts + * Deassert interrupts */ - wmb(); - jwrite32(jme, JME_IENS, INTR_ENABLE); -#endif + jwrite32(jme, JME_IEVE, intrstat & INTR_ENABLE); + +out: return rc; } static int jme_open(struct net_device *netdev) { struct jme_adapter *jme = netdev_priv(netdev); - int CHECK_VAR; + int rc; - CHECK_AND_GOTO(request_irq(jme->pdev->irq, jme_intr, IRQF_SHARED, - netdev->name, netdev), - err_out, - "Requesting IRQ error.") + rc = request_irq(jme->pdev->irq, jme_intr, + IRQF_SHARED, netdev->name, netdev); + if(rc) { + printk(KERN_ERR PFX "Requesting IRQ error.\n"); + goto err_out; + } - CHECK_AND_GOTO(jme_setup_rx_resources(jme), - err_out_free_irq, - "Error allocating resources for RX.") + rc = jme_setup_rx_resources(jme); + if(rc) { + printk(KERN_ERR PFX "Allocating resources for RX error.\n"); + goto err_out_free_irq; + } - CHECK_AND_GOTO(jme_setup_tx_resources(jme), - err_out_free_rx_resources, - "Error allocating resources for TX.") + + rc = jme_setup_tx_resources(jme); + if(rc) { + printk(KERN_ERR PFX "Allocating resources for TX error.\n"); + goto err_out_free_rx_resources; + } jme_reset_mac_processor(jme); jme_check_link(netdev); + jme_enable_shadow(jme); jme_start_irq(jme); jme_enable_rx_engine(jme); jme_enable_tx_engine(jme); @@ -824,7 +1050,7 @@ err_out_free_irq: err_out: netif_stop_queue(netdev); netif_carrier_off(netdev); - return CHECK_VAR; + return rc; } static int jme_close(struct net_device *netdev) @@ -835,8 +1061,12 @@ static int jme_close(struct net_device *netdev) netif_carrier_off(netdev); jme_stop_irq(jme); + jme_disable_shadow(jme); free_irq(jme->pdev->irq, jme->dev); + tasklet_kill(&jme->linkch_task); + tasklet_kill(&jme->txclean_task); + tasklet_kill(&jme->rxclean_task); jme_disable_rx_engine(jme); jme_disable_tx_engine(jme); jme_free_rx_resources(jme); @@ -852,7 +1082,6 @@ static int jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) struct TxDesc *txdesc = txring->desc; int idx; - /* * Check if transmit queue is already full * and take one descriptor to use @@ -861,39 +1090,37 @@ static int jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) idx = txring->next_to_use; if(unlikely(txdesc[idx].desc1.flags & TXFLAG_OWN)) { spin_unlock(&jme->xmit_lock); +#ifdef TX_BUSY_DEBUG + dprintk(netdev->name, "TX Device busy.\n"); +#endif return NETDEV_TX_BUSY; } if(unlikely(++(txring->next_to_use) == RING_DESC_NR)) txring->next_to_use = 0; spin_unlock(&jme->xmit_lock); - /* * Fill up TX descriptors */ - skb_copy_from_linear_data(skb, - (void*)ALIGN((unsigned long)(txring->buf_virt[idx]), 8), - skb->len); - jme_set_new_txdesc(jme, idx, skb->len); - - /* - * Since still using copy now. we could free it here. - */ - dev_kfree_skb(skb); + jme_set_new_txdesc(jme, idx, skb); /* * Tell MAC HW to send */ - jwrite32(jme, JME_TXCS, TXCS_QUEUE0S | - TXCS_DEFAULT | - TXCS_SELECT_QUEUE0 | - TXCS_ENABLE); + jwrite32(jme, JME_TXCS, jme->reg_txcs | + TXCS_SELECT_QUEUE0 | + TXCS_QUEUE0S | + TXCS_ENABLE); + +#ifdef TX_PKT_DEBUG + dprintk(netdev->name, "Asked to transmit.\n"); +#endif - netdev->stats.tx_bytes += skb->len; - ++(netdev->stats.tx_packets); + NET_STAT.tx_bytes += skb->len; + ++(NET_STAT.tx_packets); netdev->trans_start = jiffies; - return 0; + return NETDEV_TX_OK; } static int jme_set_macaddr(struct net_device *netdev, void *p) @@ -912,10 +1139,10 @@ static int jme_set_macaddr(struct net_device *netdev, void *p) addr->sa_data[2] << 16 | addr->sa_data[1] << 8 | addr->sa_data[0]; - jwrite32(jme, JME_RXUMA, val); + jwrite32(jme, JME_RXUMA_LO, val); val = addr->sa_data[5] << 8 | addr->sa_data[4]; - jwrite32(jme, JME_RXUMA+4, val); + jwrite32(jme, JME_RXUMA_HI, val); spin_unlock(&jme->macaddr_lock); return 0; @@ -946,7 +1173,8 @@ static void jme_set_multi(struct net_device *netdev) ++i, mclist = mclist->next) { bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3F; mc_hash[bit_nr >> 5] |= 1 << (bit_nr & 0x1F); - dprintk("Adding MCAddr: " +#ifdef SET_MULTI_DEBUG + dprintk(netdev->name, "Adding MCAddr: " "%02x:%02x:%02x:%02x:%02x:%02x (%d)\n", mclist->dmi_addr[0], mclist->dmi_addr[1], @@ -955,10 +1183,11 @@ static void jme_set_multi(struct net_device *netdev) mclist->dmi_addr[4], mclist->dmi_addr[5], bit_nr); +#endif } - jwrite32(jme, JME_RXMCHT, mc_hash[0]); - jwrite32(jme, JME_RXMCHT+4, mc_hash[1]); + jwrite32(jme, JME_RXMCHT_LO, mc_hash[0]); + jwrite32(jme, JME_RXMCHT_HI, mc_hash[1]); } @@ -966,7 +1195,9 @@ static void jme_set_multi(struct net_device *netdev) jwrite32(jme, JME_RXMCS, val); spin_unlock(&jme->macaddr_lock); - dprintk("RX Mode changed: %08x\n", val); +#ifdef SET_MULTI_DEBUG + dprintk(netdev->name, "RX Mode changed: %08x\n", val); +#endif } static int jme_change_mtu(struct net_device *dev, int new_mtu) @@ -1024,25 +1255,30 @@ static const struct ethtool_ops jme_ethtool_ops = { static int __devinit jme_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - int CHECK_VAR = 0; + int rc = 0; struct net_device *netdev; struct jme_adapter *jme; - DECLARE_MAC_BUF(mac); /* * set up PCI device basics */ - CHECK_AND_GOTO(pci_enable_device(pdev), - err_out, - "Cannot enable PCI device.") + rc = pci_enable_device(pdev); + if(rc) { + printk(KERN_ERR PFX "Cannot enable PCI device.\n"); + goto err_out; + } - CHECK_AND_GOTO(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM), - err_out_disable_pdev, - "No PCI resource region found.") + if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + printk(KERN_ERR PFX "No PCI resource region found.\n"); + rc = -ENOMEM; + goto err_out_disable_pdev; + } - CHECK_AND_GOTO(pci_request_regions(pdev, DRV_NAME), - err_out_disable_pdev, - "Cannot obtain PCI resource region.") + rc = pci_request_regions(pdev, DRV_NAME); + if(rc) { + printk(KERN_ERR PFX "Cannot obtain PCI resource region.\n"); + goto err_out_disable_pdev; + } pci_set_master(pdev); @@ -1051,8 +1287,8 @@ static int __devinit jme_init_one(struct pci_dev *pdev, */ netdev = alloc_etherdev(sizeof(struct jme_adapter)); if(!netdev) { - CHECK_VAR = -ENOMEM; - goto err_out_disable_pdev; + rc = -ENOMEM; + goto err_out_release_regions; } netdev->open = jme_open; netdev->stop = jme_close; @@ -1074,28 +1310,50 @@ static int __devinit jme_init_one(struct pci_dev *pdev, jme->dev = netdev; jme->regs = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); - if (!jme->regs) { + if (!(jme->regs)) { rc = -ENOMEM; goto err_out_free_netdev; } + jme->shadow_regs = pci_alloc_consistent(pdev, + sizeof(__u32) * SHADOW_REG_NR, + &(jme->shadow_dma)); + if (!(jme->shadow_regs)) { + rc = -ENOMEM; + goto err_out_unmap; + } + spin_lock_init(&jme->xmit_lock); spin_lock_init(&jme->recv_lock); spin_lock_init(&jme->macaddr_lock); spin_lock_init(&jme->phy_lock); + tasklet_init(&jme->linkch_task, + &jme_link_change_tasklet, + (unsigned long) jme); + tasklet_init(&jme->txclean_task, + &jme_tx_clean_tasklet, + (unsigned long) jme); + tasklet_init(&jme->rxclean_task, + &jme_rx_clean_tasklet, + (unsigned long) jme); jme->mii_if.dev = netdev; jme->mii_if.phy_id = 1; jme->mii_if.supports_gmii = 1; jme->mii_if.mdio_read = jme_mdio_read; jme->mii_if.mdio_write = jme_mdio_write; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21) + netdev->get_stats = &(jme_get_stats); +#endif /* * Reset MAC processor and reload EEPROM for MAC Address */ jme_clear_pm(jme); jme_reset_mac_processor(jme); - CHECK_AND_GOTO(jme_reload_eeprom(jme), - err_out_unmap, - "Rload eeprom for reading MAC Address error."); + rc = jme_reload_eeprom(jme); + if(rc) { + printk(KERN_ERR PFX "Rload eeprom for reading MAC Address error.\n"); + goto err_out_free_shadow; + } jme_load_macaddr(netdev); @@ -1108,30 +1366,41 @@ static int __devinit jme_init_one(struct pci_dev *pdev, /* * Register netdev */ - CHECK_AND_GOTO(register_netdev(netdev), - err_out_unmap, - "Cannot register net device.") - - printk(KERN_INFO "%s: JMC250 gigabit eth at %llx, %s, IRQ %d\n", - netdev->name, - (unsigned long long) pci_resource_start(pdev, 0), - print_mac(mac, netdev->dev_addr), - pdev->irq); + rc = register_netdev(netdev); + if(rc) { + printk(KERN_ERR PFX "Cannot register net device.\n"); + goto err_out_free_shadow; + } - pci_set_drvdata(pdev, netdev); + jprintk(netdev->name, + "JMC250 gigabit eth at %llx, %02x:%02x:%02x:%02x:%02x:%02x, IRQ %d\n", + (unsigned long long) pci_resource_start(pdev, 0), + netdev->dev_addr[0], + netdev->dev_addr[1], + netdev->dev_addr[2], + netdev->dev_addr[3], + netdev->dev_addr[4], + netdev->dev_addr[5], + pdev->irq); return 0; +err_out_free_shadow: + pci_free_consistent(pdev, + sizeof(__u32) * SHADOW_REG_NR, + jme->shadow_regs, + jme->shadow_dma); err_out_unmap: iounmap(jme->regs); err_out_free_netdev: pci_set_drvdata(pdev, NULL); free_netdev(netdev); +err_out_release_regions: + pci_release_regions(pdev); err_out_disable_pdev: pci_disable_device(pdev); - pci_set_drvdata(pdev, NULL); err_out: - return CHECK_VAR; + return rc; } static void __devexit jme_remove_one(struct pci_dev *pdev) @@ -1140,6 +1409,10 @@ static void __devexit jme_remove_one(struct pci_dev *pdev) struct jme_adapter *jme = netdev_priv(netdev); unregister_netdev(netdev); + pci_free_consistent(pdev, + sizeof(__u32) * SHADOW_REG_NR, + jme->shadow_regs, + jme->shadow_dma); iounmap(jme->regs); pci_set_drvdata(pdev, NULL); free_netdev(netdev); @@ -1168,8 +1441,8 @@ static struct pci_driver jme_driver = { static int __init jme_init_module(void) { - printk(KERN_INFO "jme: JMicron JMC250 gigabit ethernet " - "driver version %s\n", DRV_VERSION); + printk(KERN_INFO PFX "JMicron JMC250 gigabit ethernet " + "driver version %s\n", DRV_VERSION); return pci_register_driver(&jme_driver); }