作者:gfree.wind@gmail.com
博客:linuxfocus.blog.chinaunix.net
上次学习IP包的发送流程时,学习到了dev_queue_xmit这个函数。
- int dev_queue_xmit(struct sk_buff *skb)
- {
- struct net_device *dev = skb->dev;
- struct netdev_queue *txq;
- struct Qdisc *q;
- int rc = -ENOMEM;
- /* Disable soft irqs for various locks below. Also
- * stops preemption for RCU.
- */
- rcu_read_lock_bh();
/* 得到发送队列 */
- txq = dev_pick_tx(dev, skb);
- q = rcu_dereference_bh(txq->qdisc);
- #ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
- #endif
- if (q->enqueue) {
- /* 一般的dev都应该进入这里 */
- rc = __dev_xmit_skb(skb, q, dev, txq);
- goto out;
- }
...... ......
- }
- static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
-
struct net_device *dev,
-
struct netdev_queue *txq)
-
{
-
spinlock_t *root_lock = qdisc_lock(q);
-
bool contended = qdisc_is_running(q);
-
int rc;
-
-
/*
-
* Heuristic to force contended enqueues to serialize on a
-
* separate lock before trying to get qdisc main lock.
-
* This permits __QDISC_STATE_RUNNING owner to get the lock more often
-
* and dequeue packets faster.
-
*/
-
if (unlikely(contended))
-
spin_lock(&q->busylock);
-
-
spin_lock(root_lock);
-
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
- /* 该quque的状态为非活动的,drop该数据包 */
-
kfree_skb(skb);
-
rc = NET_XMIT_DROP;
-
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
-
qdisc_run_begin(q)) {
/*
这部分代码,从注释上看,似乎选中的queue是一个保留的工作queue。
想来也是非正常路径,暂时保留不看。
*/
-
/*
-
* This is a work-conserving queue; there are no old skbs
-
* waiting to be sent out; and the qdisc is not running -
-
* xmit the skb directly.
-
*/
-
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
-
skb_dst_force(skb);
-
__qdisc_update_bstats(q, skb->len);
-
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
-
if (unlikely(contended)) {
-
spin_unlock(&q->busylock);
-
contended = false;
-
}
-
__qdisc_run(q);
-
} else
-
qdisc_run_end(q);
-
-
rc = NET_XMIT_SUCCESS;
-
} else {
/* 正常路径 */
/* 确保dst被引用,防止被其他模块释放 */
-
skb_dst_force(skb);
- /* 将数据包加入到queue中 */
-
rc = qdisc_enqueue_root(skb, q);
/* 如果queue不是运行状态,将其置为运行状态 */
-
if (qdisc_run_begin(q)) {
-
if (unlikely(contended)) {
-
spin_unlock(&q->busylock);
-
contended = false;
-
}
-
__qdisc_run(q);
-
}
-
}
-
spin_unlock(root_lock);
-
if (unlikely(contended))
-
spin_unlock(&q->busylock);
-
return rc;
- }
将数据包加入队列的函数是通过q->enque的回调实现的,那么这个enque的回调钩子函数是何时注册上的呢?
请看dev_activate,用于激活网卡。
当没有指定queueing discipline时,就使用默认的discipline
*/
这里不列出attach_default_qdiscs的代码了,一般情况下,网卡只有一个queue时,这个默认的discipline为
那么对于我们来说,就确定了默认的要是一般情况下的enque函数为pfifo_fast_enqueue。
上面就是__dev_xmit_skb中调用的q->enque的代码,将数据包加入到了dev->_tx所对应的队列中。
然后我还需要回到__dev_xmit_skb中,在加数据包加入到队列中后。要保证qdisc为运行态。
查看__qdisc_run的代码。
/*
qdisc_restart中发送了数据包。
这里是循环发送,直至qdisc_restart返回0
或者其它进程请求CPU或发送已运行比较长的时间(1jiffie)则也跳出循环体。
*/
进入qdisc_restart->sch_direct_xmit,该函数用于发送一个数据包
到此,本文长度已经不短了,先就此结尾。IP包的发送流程比接收流程要复杂得多,估计还需一篇博文才能基本走完。
请看dev_activate,用于激活网卡。
- void dev_activate(struct net_device *dev)
-
{
-
int need_watchdog;
-
-
/* No queueing discipline is attached to device;
-
create default one i.e. pfifo_fast for devices,
-
which need queueing and noqueue_qdisc for
-
virtual interfaces
-
*/
当没有指定queueing discipline时,就使用默认的discipline
*/
-
if (dev->qdisc == &noop_qdisc)
-
attach_default_qdiscs(dev);
-
- ...... ......
- }
- struct Qdisc_ops pfifo_fast_ops __read_mostly = {
-
.id = "pfifo_fast",
-
.priv_size = sizeof(struct pfifo_fast_priv),
-
.enqueue = pfifo_fast_enqueue,
-
.dequeue = pfifo_fast_dequeue,
-
.peek = pfifo_fast_peek,
-
.init = pfifo_fast_init,
-
.reset = pfifo_fast_reset,
-
.dump = pfifo_fast_dump,
-
.owner = THIS_MODULE,
- };
- static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
-
{
-
if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
-
int band = prio2band[skb->priority & TC_PRIO_MAX];
-
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
-
struct sk_buff_head *list = band2list(priv, band);
-
-
priv->bitmap |= (1 << band);
-
qdisc->q.qlen++;
-
return __qdisc_enqueue_tail(skb, qdisc, list);
-
}
-
-
return qdisc_drop(skb, qdisc);
- }
然后我还需要回到__dev_xmit_skb中,在加数据包加入到队列中后。要保证qdisc为运行态。
- rc = qdisc_enqueue_root(skb, q);
-
if (qdisc_run_begin(q)) {
-
if (unlikely(contended)) {
-
spin_unlock(&q->busylock);
-
contended = false;
-
}
-
__qdisc_run(q);
- }
- void __qdisc_run(struct Qdisc *q)
-
{
-
unsigned long start_time = jiffies;
/*
qdisc_restart中发送了数据包。
这里是循环发送,直至qdisc_restart返回0
或者其它进程请求CPU或发送已运行比较长的时间(1jiffie)则也跳出循环体。
*/
-
while (qdisc_restart(q)) {
-
/*
-
* Postpone processing if
-
* 1. another process needs the CPU;
-
* 2. we've been doing it for too long.
-
*/
- if (need_resched() || jiffies != start_time) {
- /*
- 需要以后再执行发送动作(利用softirq)
- */
-
__netif_schedule(q);
-
break;
-
}
-
}
-
-
qdisc_run_end(q);
- }
- int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
-
struct net_device *dev, struct netdev_queue *txq,
-
spinlock_t *root_lock)
-
{
-
int ret = NETDEV_TX_BUSY;
-
-
/* And release qdisc */
-
spin_unlock(root_lock);
-
- HARD_TX_LOCK(dev, txq, smp_processor_id());
- //设备没有被停止,且发送队列没有被冻结
-
if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
-
ret = dev_hard_start_xmit(skb, dev, txq); //发送数据包
-
-
HARD_TX_UNLOCK(dev, txq);
-
-
spin_lock(root_lock);
-
-
if (dev_xmit_complete(ret)) {
- /* Driver sent out skb successfully or skb was consumed */
- //发送成功,返回qdisc新的队列产的
-
ret = qdisc_qlen(q);
-
} else if (ret == NETDEV_TX_LOCKED) {
- /* Driver try lock failed */
- //锁冲突
-
ret = handle_dev_cpu_collision(skb, txq, q);
-
} else {
-
/* Driver returned NETDEV_TX_BUSY - requeue skb */
-
if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-
printk(KERN_WARNING "BUG %s code %d qlen %d\n",
-
dev->name, ret, q->q.qlen);
-
ret = dev_requeue_skb(skb, q);
-
}
-
-
if (ret && (netif_tx_queue_stopped(txq) ||
-
netif_tx_queue_frozen(txq)))
-
ret = 0;
-
-
return ret;
- }