ndev = alloc_etherdev(sizeof(struct fec_enet_private));
struct fec_enet_private *fep = netdev_priv(ndev);
这种通过结构体内部指针传递私有数据的方式在driver中非常常见。函数开头即为Ethernet Controller的DMA 控制器分配相应的buffer描述符:
/* Allocate memory for buffer descriptors. */
cbd_base = dma_alloc_noncacheable(NULL, BUFDES_SIZE, &fep->bd_dma,
GFP_KERNEL);
if (!cbd_base) {
printk("FEC: allocate descriptor memory failed?\n");
return -ENOMEM;
}
这里分配的缓冲区大小是(tx buffer个数+rx buffer个数)×buffer描述符大小:
#define BUFDES_SIZE ((RX_RING_SIZE + TX_RING_SIZE) * sizeof(struct bufdesc))
由于buffer描述符会被CPU以及DMA控制器访问,因此会存在Cache一致性问题,这里采用了dma_alloc_noncacheable()函数,即DMA一致性映射。这里采用一致性映射是因为CPU或者DMA控制器会以不可预知的方式去访问这段内存区,在Linux Kernel中解决Cache一致性问题有两种方案:DMA流式映射和DMA一致性映射,关于这两者的区别在《Understanding Linux Kernel》以及《LDD3》中均有介绍,我个人也总结了一篇博文初步讲述了这两者的区别:http://blog.163.com/thinki_cao/blog/static/83944875201362142939337。
这里分析一下DMA控制器,i.MX6的DMA控制器采用了环形buffer描述符,这里buffer分为两种,Legacy buffer descriptor是为了保持对前代Freescale器件的兼容性,而Enhanced buffer descriptor则提供了更多的功能,引用i.MX6Q的reference manual中的图:
而Enhanced buffer descriptor一个有64字节,也是采用大端存储模式的,个人觉得这个Ethernet IP有点像是从PowerPC那边扣过来的。
可以从fec.h文件中找到对这两个描述符的定义:
struct bufdesc {
unsigned short cbd_datlen; /* Data length */
unsigned short cbd_sc; /* Control and status info */
unsigned long cbd_bufaddr; /* Buffer address */
#ifdef CONFIG_ENHANCED_BD
unsigned long cbd_esc;
unsigned long cbd_prot;
unsigned long cbd_bdu;
unsigned long ts;
unsigned short res0[4];
#endif
如果定义了CONFIG_ENHANCED_BD宏,则开启Enhanced buffer descriptor的支持。不过纵观整个driver程序,3.0.35的内核并没有使用enhanced buffer descriptor使用的一些功能,比如Enhanced transmit buffer descriptor中的offset+8位置的PINS和IINS位,提供了采用MAC提供的IP accelerator进行硬件校验,提供对协议的校验和IP头的校验。而在yocto 3.10.17内核上,这些已经支持了!这也是为什么3.0.35上的Ethernet driver的性能不如3.10.17上的原因之一吧。下面继续分析代码:
spin_lock_init(&fep->hw_lock); /* 初始化自旋锁 */
fep->netdev = ndev; /*把net_device的地址传给netdev*/
/* Get the Ethernet address */ fec_get_mac(ndev);
fec_get_mac会从多个地方获取mac地址:
static void __inline__ fec_get_mac(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
unsigned char *iap, tmpaddr[ETH_ALEN];
/*
* try to get mac address in following order:
*
* 1) module parameter via kernel command line in form
* fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0
*/
iap = macaddr;
/*
* 2) from flash or fuse (via platform data)
*/
if (!is_valid_ether_addr(iap)) {
if (pdata)
memcpy(iap, pdata->mac, ETH_ALEN);
}
/*
* 3) FEC mac registers set by bootloader
*/
if (!is_valid_ether_addr(iap)) {
*((unsigned long *) &tmpaddr[0]) =
be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
*((unsigned short *) &tmpaddr[4]) =
be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
iap = &tmpaddr[0];
}
memcpy(ndev->dev_addr, iap, ETH_ALEN);
/* Adjust MAC if using macaddr */
if (iap == macaddr)
ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->pdev->id;
}
1)首先是从全局变量macaddr获取ip地址,macaddr定义相关的代码如下:
static unsigned char macaddr[ETH_ALEN];
module_param_array(macaddr, byte, NULL, 0);
MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
__setup("fec_mac=", fec_mac_addr_setup);
这里的__setup是用来从uboot传给内核的启动参数中捕获fec_mac(即mac地址)参数,并将该参数传递给fec_mac_addr_setup(char *mac_addr)函数进行解析的。如果uboot中没有传递mac参数,那么macaddr数组中的成员全是0。
2)检测1)中获取的mac地址是否合法,如果不合法,则从设备的私有数据结构(如果pdata指针不为空)struct fec_platform_data中获取mac数组的值。
3)检测2)中获取的mac地址是否合法,如果不合法,则读取Ethernet控制器的mac地址寄存器来获取mac地址。
最后把mac地址传递给内核中net_device结构体中的dev_addr字段。
/* Set receive and transmit descriptor base. */fep->rx_bd_base = cbd_base;fep->tx_bd_base = cbd_base + RX_RING_SIZE;
设置tx_bd_base和rx_bd_base,即tx buffer descriptor base 和rx buffer descriptor base,示意图如下:
接着就是net_device已经fec_enet_private等结构体的设置:
/* The FEC Ethernet specific entries in the device structure */ndev->watchdog_timeo = TX_TIMEOUT; /* watchdong定时器唤醒间隔 */ndev->netdev_ops = &fec_netdev_ops;ndev->ethtool_ops = &fec_enet_ethtool_ops;fep->use_napi = FEC_NAPI_ENABLE; fep->napi_weight = FEC_NAPI_WEIGHT; if (fep->use_napi) { fec_rx_int_is_enabled(ndev, false); netif_napi_add(ndev, &fep->napi, fec_rx_poll, fep->napi_weight); }
/* Initialize the receive buffer descriptors. */bdp = fep->rx_bd_base;for (i = 0; i < RX_RING_SIZE; i++) {/* Initialize the BD for every fragment in the page. */bdp->cbd_sc = 0;bdp->cbd_bufaddr = 0;bdp++;}/* Set the last buffer to wrap */bdp--;bdp->cbd_sc |= BD_SC_WRAP;
fec_restart(ndev, 0);
/* Whack a reset. We should wait for this. */writel(1, fep->hwp + FEC_ECNTRL);udelay(10);/* if uboot don't set MAC address, get MAC address* from command line; if command line don't set MAC* address, get from OCOTP; otherwise, allocate random* address.*/memcpy(&temp_mac, dev->dev_addr, ETH_ALEN);writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW);writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH);/* Clear any outstanding interrupt. */writel(0xffc00000, fep->hwp + FEC_IEVENT);/* Reset all multicast. */writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);/* Set maximum receive buffer size. */writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE);/* Set receive and transmit descriptor base. */writel(fep->bd_dma, fep->hwp + FEC_R_DES_START);writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) * RX_RING_SIZE,fep->hwp + FEC_X_DES_START);
/* Reinit transmit descriptors */fec_enet_txbd_init(dev);fep->dirty_tx = fep->cur_tx = fep->tx_bd_base;fep->cur_rx = fep->rx_bd_base;/* Reset SKB transmit buffers. */fep->skb_cur = fep->skb_dirty = 0;for (i = 0; i <= TX_RING_MOD_MASK; i++) {if (fep->tx_skbuff[i]) {dev_kfree_skb_any(fep->tx_skbuff[i]);fep->tx_skbuff[i] = NULL;}}
接下来设置半双工或者全双工模式,默认情况下是半双工模式(即发送时不接受数据)/* Enable MII mode */if (duplex) {/* MII enable / FD enable */writel(OPT_FRAME_SIZE | 0x04, fep->hwp + FEC_R_CNTRL);writel(0x04, fep->hwp + FEC_X_CNTRL);} else {/* MII enable / No Rcv on Xmit */writel(OPT_FRAME_SIZE | 0x06, fep->hwp + FEC_R_CNTRL);writel(0x0, fep->hwp + FEC_X_CNTRL);}fep->full_duplex = duplex;/* Set MII speed */writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
if (fep->ptimer_present) {/* Set Timer count */ret = fec_ptp_start(fep->ptp_priv);if (ret) {fep->ptimer_present = 0;reg = 0x0;} elsereg = 0x0;} elsereg = 0x0;
static const struct net_device_ops fec_netdev_ops = {.ndo_open = fec_enet_open,.ndo_stop = fec_enet_close,.ndo_start_xmit = fec_enet_start_xmit,.ndo_set_multicast_list = set_multicast_list,.ndo_change_mtu = eth_change_mtu,.ndo_validate_addr = eth_validate_addr,.ndo_tx_timeout = fec_timeout,.ndo_set_mac_address = fec_set_mac_address,.ndo_do_ioctl = fec_enet_ioctl,#ifdef CONFIG_NET_POLL_CONTROLLER.ndo_poll_controller = fec_enet_netpoll,#endif};
if (fep->use_napi)napi_enable(&fep->napi);
clk_enable(fep->clk);
ret = fec_enet_alloc_buffers(ndev);if (ret)return ret;
/* Probe and connect to PHY when open the interface */ret = fec_enet_mii_probe(ndev);if (ret) {fec_enet_free_buffers(ndev);return ret;}phy_start(fep->phy_dev);netif_start_queue(ndev);fep->opened = 1;ret = -EINVAL;if (pdata->init && pdata->init(fep->phy_dev))return ret;return 0
spin_lock_irqsave(&fep->hw_lock, flags);
if (!fep->link) {/* Link is down or autonegotiation is in progress. */netif_stop_queue(ndev);spin_unlock_irqrestore(&fep->hw_lock, flags);return NETDEV_TX_BUSY;}
/* Fill in a Tx ring entry */bdp = fep->cur_tx;status = bdp->cbd_sc;
if (status & BD_ENET_TX_READY) {/* Ooops. All transmit buffers are full. Bail out.* This should not happen, since ndev->tbusy should be set.*/printk("%s: tx queue full!.\n", ndev->name);netif_stop_queue(ndev);spin_unlock_irqrestore(&fep->hw_lock, flags);return NETDEV_TX_BUSY;}
/* Clear all of the status flags */status &= ~BD_ENET_TX_STATS;/* Set buffer length and buffer pointer */bufaddr = skb->data;bdp->cbd_datlen = skb->len;
/** On some FEC implementations data must be aligned on* 4-byte boundaries. Use bounce buffers to copy data* and get it aligned. Ugh.*/if (((unsigned long) bufaddr) & FEC_ALIGNMENT) {unsigned int index;index = bdp - fep->tx_bd_base;bufaddr = PTR_ALIGN(fep->tx_bounce[index], FEC_ALIGNMENT + 1);memcpy(bufaddr, (void *)skb->data, skb->len);}
if (fep->ptimer_present) {if (fec_ptp_do_txstamp(skb)) {estatus = BD_ENET_TX_TS;status |= BD_ENET_TX_PTP;} elseestatus = 0;#ifdef CONFIG_ENHANCED_BDbdp->cbd_esc = (estatus | BD_ENET_TX_INT);bdp->cbd_bdu = 0;#endif}
/** Some design made an incorrect assumption on endian mode of* the system that it's running on. As the result, driver has to* swap every frame going to and coming from the controller.*/if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)swap_buffer(bufaddr, skb->len);
/* Save skb pointer */fep->tx_skbuff[fep->skb_cur] = skb;ndev->stats.tx_bytes += skb->len;fep->skb_cur = (fep->skb_cur+1) & TX_RING_MOD_MASK;
/* Push the data cache so the CPM does not get stale memory* data.*/bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr,FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE);/* Send it on its way. Tell FEC it's ready, interrupt when done,* it's the last BD of the frame, and to put the CRC on the end.*/status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR| BD_ENET_TX_LAST | BD_ENET_TX_TC);bdp->cbd_sc = status;
/* Trigger transmission start */writel(0, fep->hwp + FEC_X_DES_ACTIVE);
bdp_pre = fec_enet_get_pre_txbd(ndev);if ((id_entry->driver_data & FEC_QUIRK_BUG_TKT168103) &&!(bdp_pre->cbd_sc & BD_ENET_TX_READY))schedule_delayed_work(&fep->fixup_trigger_tx,msecs_to_jiffies(1));
/* If this was the last BD in the ring, start at the beginning again. */if (status & BD_ENET_TX_WRAP)bdp = fep->tx_bd_base;elsebdp++;
if (bdp == fep->dirty_tx) {fep->tx_full = 1;netif_stop_queue(ndev);}fep->cur_tx = bdp;
spin_unlock_irqrestore(&fep->hw_lock, flags);return NETDEV_TX_OK;
do {int_events = readl(fep->hwp + FEC_IEVENT);writel(int_events, fep->hwp + FEC_IEVENT);……………………} while (int_events);
if (int_events & FEC_ENET_RXF) {ret = IRQ_HANDLED;spin_lock_irqsave(&fep->hw_lock, flags);if (fep->use_napi) {/* Disable the RX interrupt */if (napi_schedule_prep(&fep->napi)) {fec_rx_int_is_enabled(ndev, false);__napi_schedule(&fep->napi);}} elsefec_enet_rx(ndev);spin_unlock_irqrestore(&fep->hw_lock, flags);}
/* Transmit OK, or non-fatal error. Update the buffer* descriptors. FEC handles all errors, we just discover* them as part of the transmit process.*/if (int_events & FEC_ENET_TXF) {ret = IRQ_HANDLED;fec_enet_tx(ndev);}
bdp = fep->dirty_tx;
while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) {……………………/* Update pointer to next buffer descriptor to be transmitted */if (status & BD_ENET_TX_WRAP)bdp = fep->tx_bd_base;elsebdp++;}
if (bdp == fep->cur_tx && fep->tx_full == 0)break;
if (bdp->cbd_bufaddr)dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE);bdp->cbd_bufaddr = 0;
skb = fep->tx_skbuff[fep->skb_dirty];if (!skb)break;
/* Check for errors. */if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |BD_ENET_TX_RL | BD_ENET_TX_UN |BD_ENET_TX_CSL)) {ndev->stats.tx_errors++;if (status & BD_ENET_TX_HB) /* No heartbeat */ndev->stats.tx_heartbeat_errors++;if (status & BD_ENET_TX_LC) /* Late collision */ndev->stats.tx_window_errors++;if (status & BD_ENET_TX_RL) /* Retrans limit */ndev->stats.tx_aborted_errors++;if (status & BD_ENET_TX_UN) /* Underrun */ndev->stats.tx_fifo_errors++;if (status & BD_ENET_TX_CSL) /* Carrier lost */ndev->stats.tx_carrier_errors++;} else {ndev->stats.tx_packets++;}
if (status & BD_ENET_TX_READY)printk("HEY! Enet xmit interrupt and TX_READY.\n");
/* Deferred means some collisions occurred during transmit,* but we eventually sent the packet OK.*/if (status & BD_ENET_TX_DEF)ndev->stats.collisions++;
#if defined(CONFIG_ENHANCED_BD)if (fep->ptimer_present) {if (bdp->cbd_esc & BD_ENET_TX_TS)fec_ptp_store_txstamp(fpp, skb, bdp);}#elif defined(CONFIG_IN_BAND)if (fep->ptimer_present) {if (status & BD_ENET_TX_PTP)fec_ptp_store_txstamp(fpp, skb, bdp);}#endif
/* Free the sk buffer associated with this last transmit */dev_kfree_skb_any(skb);fep->tx_skbuff[fep->skb_dirty] = NULL;fep->skb_dirty = (fep->skb_dirty + 1) & TX_RING_MOD_MASK;
/* Since we have freed up a buffer, the ring is no longer full*/if (fep->tx_full) {fep->tx_full = 0;if (netif_queue_stopped(ndev))netif_wake_queue(ndev);}
fep->dirty_tx = bdp;
bdp = fep->cur_rx;while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {………………/* Update BD pointer to next entry */if (status & BD_ENET_RX_WRAP)bdp = fep->rx_bd_base;elsebdp++;}fep->cur_rx = bdp;
/* Since we have allocated space to hold a complete frame,* the last indicator should be set.*/if ((status & BD_ENET_RX_LAST) == 0)printk("FEC ENET: rcv is not +last\n");
if (!fep->opened)goto rx_processing_done;
/* Check for errors. */if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |BD_ENET_RX_CR | BD_ENET_RX_OV)) {ndev->stats.rx_errors++;if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH)) {/* Frame too long or too short. */ndev->stats.rx_length_errors++;}if (status & BD_ENET_RX_NO) /* Frame alignment */ndev->stats.rx_frame_errors++;if (status & BD_ENET_RX_CR) /* CRC Error */ndev->stats.rx_crc_errors++;if (status & BD_ENET_RX_OV) /* FIFO overrun */ndev->stats.rx_fifo_errors++;}/* Report late collisions as a frame error.* On this error, the BD is closed, but we don't know what we* have in the buffer. So, just drop this frame on the floor.*/if (status & BD_ENET_RX_CL) {ndev->stats.rx_errors++;ndev->stats.rx_frame_errors++;goto rx_processing_done;}
/* Process the incoming frame. */ndev->stats.rx_packets++;pkt_len = bdp->cbd_datlen;ndev->stats.rx_bytes += pkt_len;
data = (__u8 *)__va(bdp->cbd_bufaddr);if (bdp->cbd_bufaddr)dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE);if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)swap_buffer(data, pkt_len);
/* This does 16 byte alignment, exactly what we need.* The packet length includes FCS, but we don't want to* include that when passing upstream as it messes up* bridging applications.*/skb = dev_alloc_skb(pkt_len - 4 + NET_IP_ALIGN);if (unlikely(!skb)) { printk("%s: Memory squeeze, dropping packet.\n", ndev->name); ndev->stats.rx_dropped++; } else { skb_reserve(skb, NET_IP_ALIGN); skb_put(skb, pkt_len - 4); /* Make room */ skb_copy_to_linear_data(skb, data, pkt_len - 4); /* 1588 messeage TS handle */ if (fep->ptimer_present) fec_ptp_store_rxstamp(fpp, skb, bdp); skb->protocol = eth_type_trans(skb, ndev); netif_rx(skb); }
bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE);
rx_processing_done:/* Clear the status flags for this buffer */status &= ~BD_ENET_RX_STATS;/* Mark the buffer empty */status |= BD_ENET_RX_EMPTY;bdp->cbd_sc = status;#ifdef CONFIG_ENHANCED_BDbdp->cbd_esc = BD_ENET_RX_INT;bdp->cbd_prot = 0;bdp->cbd_bdu = 0;#endif/* Update BD pointer to next entry */if (status & BD_ENET_RX_WRAP)bdp = fep->rx_bd_base;elsebdp++;
/* Doing this here will keep the FEC running while we process* incoming frames. On a heavily loaded network, we should be* able to keep up at the expense of system resources.*/writel(0, fep->hwp + FEC_R_DES_ACTIVE);
fep->cur_rx = bdp;