7 网桥数据包的处理流程
网桥处理包遵循以下几条原则:
1. 在一个接口上接收的包不会再在那个接口上发送这个数据包;
2. 每个接收到的数据包都要学习其源地址;
3. 如果数据包是多播或广播包,则要在同一个网段中除了接收端口外的其他所有端口发送这个数据包,如果上层协议栈对多播包感兴趣,则需要把数据包提交给上层协议栈;
4. 如果数据包的目的MAC地址不能再CAM表中找到,则要在同一个网段中除了接收端口外的其他所有端口发送这个数据包;
5. 如果能够在CAM表中查询到目的MAC地址,则在特定的端口上发送这个数据包,如果发送端口和接收端口是同一端口则不发送;
网桥在整个网络子系统中处理可用下列简图说明:
网络数据包在软终端处理时会进行网桥部分处理,大致的处理流程如下(处理函数调用链):
7.1 netif_receive_skb
netif_recerve_skb函数主要做三件事情:
1. 如果有抓包程序(socket)需要skb,则将skb复制给他们;
2. 处理桥接,即如果开启了网桥,进行网桥处理;
3. 将skb交给网络层;
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
struct net_device *orig_dev;
int ret = NET_RX_DROP;
unsigned short type;
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
if (!skb->tstamp.off_sec)
net_timestamp(skb);
if (!skb->input_dev)
skb->input_dev = skb->dev;
orig_dev = skb_bond(skb);
__get_cpu_var(netdev_rx_stat).total++;
skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;
pt_prev = NULL;
rcu_read_lock();
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}
ret = ing_filter(skb);
if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}
skb->tc_verd = 0;
ncls:
#endif
handle_diverter(skb);
if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = ptype;
}
}
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
ret = NET_RX_DROP;
}
out:
rcu_read_unlock();
return ret;
} |
7.2 Br_handle_frame
1. 如果skb的目的Mac地址与接收该skb的网口的Mac地址相同,则结束桥接处理过程(返回到net_receive_skb函数后,这个skb会最终 被提交给网络层);
2. 否则,调用到br_handle_frame_finish函数将报文转发,然后释放skb(返回到net_receive_skb函数后,这个skb就 不会往网络层提交了);
int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
if (p->state == BR_STATE_DISABLED)
goto err;
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto err;
if (p->state == BR_STATE_LEARNING)
br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
if (p->br->stp_enabled &&
!memcmp(dest, bridge_ula, 5) &&
!(dest[5] & 0xF0)) {
if (!dest[5]) {
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
NULL, br_stp_handle_bpdu);
return 1;
}
}
else if (p->state == BR_STATE_FORWARDING) {
if (br_should_route_hook) {
if (br_should_route_hook(pskb))
return 0;
skb = *pskb;
dest = eth_hdr(skb)->h_dest;
}
if (!compare_ether_addr(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
br_handle_frame_finish);
return 1;
}
err:
kfree_skb(skb);
return 1;
}
|
7.3Br_handle_frame_finish
int br_handle_frame_finish(struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = skb->dev->br_port;
struct net_bridge *br = p->br;
struct net_bridge_fdb_entry *dst;
int passedup = 0;
br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
if (br->dev->flags & IFF_PROMISC) {
struct sk_buff *skb2;
skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 != NULL) {
passedup = 1;
br_pass_frame_up(br, skb2);
}
}
if (dest[0] & 1) {
br_flood_forward(br, skb, !passedup);
if (!passedup)
br_pass_frame_up(br, skb);
goto out;
}
dst = __br_fdb_get(br, dest);
if (dst != NULL && dst->is_local) {
if (!passedup)
br_pass_frame_up(br, skb);
else
kfree_skb(skb);
goto out;
}
if (dst != NULL) {
br_forward(dst->dst, skb);
goto out;
}
br_flood_forward(br, skb, 0);
out:
return 0;
} |
7.4 Br_pass_frame_up
在上个函数Br_handle_frame_finish中如果报文是需要发往本地协议栈处理的,则由函数Br_pass_frame_up实现:
static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
{
struct net_device *indev;
br->statistics.rx_packets++;
br->statistics.rx_bytes += skb->len;
indev = skb->dev;
skb->dev = br->dev;
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
br_pass_frame_up_finish);
} |
这段代码非常简单,对net_bridge的数据统计进行更新以后,再更新skb->dev,最后通过NF_HOOK在NF_BR_LOCAL_IN挂接点上调用回了netif_receive_skb;
在netif_receive_skb函数中,调用了handle_bridge函数,重新触发了网桥处理流程,现在发往网桥虚拟设备的数据包又回到了netif_receive_skb,那么网桥的处理过程会不会又被调用呢?在 linux/net/bridge/br_if.c里面可以看到br_add_if函数,实际上的操作是将某一网口加入网桥组,这个函数调用了new_nbp(br, dev); 用以填充net_bridge以及dev结构的重要成员,里面将dev->br_port设定为一个新建的net_bridge_port结构,而上面的br_pass_frame_up函数将skb->dev赋成了br->dev,实际上skb->dev变成了网桥建立的虚拟设备,这个设备是网桥本身而不是桥组的某一端口,系统没有为其调用br_add_if,所以这个net_device结构的br_port指针没有进行赋值;br_port为空,不进入网桥处理流程 ;从而进入上层协议栈处理;
7.5 Br_forward
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
if (should_deliver(to, skb)) {
__br_forward(to, skb);
return;
}
kfree_skb(skb);
} |
7.6 __br_forward
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
struct net_device *indev;
indev = skb->dev;
skb->dev = to->dev;
skb->ip_summed = CHECKSUM_NONE;
NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
br_forward_finish);
} |
7.7 Br_forward_finish
int br_forward_finish(struct sk_buff *skb)
{
NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
return 0;
} |
7.8 Br_dev_queue_push_xmit
int br_dev_queue_push_xmit(struct sk_buff *skb)
{
if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
kfree_skb(skb);
else {
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_maybe_copy_header(skb);
#endif
skb_push(skb, ETH_HLEN);
dev_queue_xmit(skb);
}
return 0;
} |
7.9 报文处理总结
进入桥的数据报文分为几个类型,桥对应的处理方法也不同:
1. 报文是本机发送给自己的,桥不处理,交给上层协议栈;
2. 接收报文的物理接口不是网桥接口,桥不处理,交给上层协议栈;
3. 进入网桥后,如果网桥的状态为Disable,则将包丢弃不处理;
4. 报文源地址无效(广播,多播,以及00:00:00:00:00:00),丢包;
5. 如果是STP的BPDU包,进入STP处理,处理后不再转发,也不再交给上层协议栈;
6. 如果是发给本机的报文,桥直接返回,交给上层协议栈,不转发;
7. 需要转发的报文分三种情况:
1) 广播或多播,则除接收端口外的所有端口都需要转发一份;
2) 单播并且在CAM表中能找到端口映射的,只需要网映射端口转发一份即可;
3) 单播但找不到端口映射的,则除了接收端口外其余端口都需要转发;
8 参考文献
1. http://hi.baidu.com/_kouu/blog/item/ad2abf3ffa61cf3170cf6cd7.html
2. http://hi.baidu.com/jrckkyy/blog/item/3bedbef37234d0c70b46e08b.html
3. http://blog.csdn.net/linyt/archive/2010/01/15/5191512.aspx
4. http://www.loosky.net/?p=307
5. http://blog.csdn.net/zhaodm/archive/2006/12/25/1460041.aspx
6. http://blog.chinaunix.net/u/12313/showart_246678.html