]> xenbits.xensource.com Git - people/ssmith/nc2-2.6.27.bak/.git/commitdiff
patch bonding-balance-slb-fixes.patch
authorSteven Smith <ssmith@weybridge.uk.xensource.com>
Thu, 28 May 2009 10:54:19 +0000 (11:54 +0100)
committerSteven Smith <ssmith@weybridge.uk.xensource.com>
Thu, 28 May 2009 10:54:19 +0000 (11:54 +0100)
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bonding.h
net/bridge/br_fdb.c
net/bridge/br_if.c
net/bridge/br_input.c
net/bridge/br_private.h

index 32b52d537ab7e0f442a7e30041bec3068b4a7791..12bb79ef971dd564c7318a04118125fd30b62012 100644 (file)
@@ -112,6 +112,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
 
 /* Forward declaration */
 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
+static void slb_send_learning_packets(struct bonding *bond);
 
 static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
 {
@@ -308,6 +309,19 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
 
 /*********************** slb specific functions ***************************/
 
+static void slb_send_learning_packets(struct bonding *bond)
+{
+       struct vlan_entry *vlan;
+       struct net_device *vlan_dev;
+
+       br_send_gratuitous_switch_learning_packet(bond->dev);
+
+       list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+               vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
+               br_send_gratuitous_switch_learning_packet(vlan_dev);
+       }
+}
+
 void bond_info_show_slb(struct seq_file *seq)
 {
        struct bonding *bond = seq->private;
@@ -1020,7 +1034,8 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
 
        /* fasten the change in the switch */
        if (SLAVE_IS_OK(slave1)) {
-               alb_send_learning_packets(slave1, slave1->dev->dev_addr);
+               if (!bond->alb_info.slb_enabled)
+                       alb_send_learning_packets(slave1, slave1->dev->dev_addr);
                if (bond->alb_info.rlb_enabled) {
                        /* inform the clients that the mac address
                         * has changed
@@ -1032,7 +1047,8 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
        }
 
        if (SLAVE_IS_OK(slave2)) {
-               alb_send_learning_packets(slave2, slave2->dev->dev_addr);
+               if (!bond->alb_info.slb_enabled)
+                       alb_send_learning_packets(slave2, slave2->dev->dev_addr);
                if (bond->alb_info.rlb_enabled) {
                        /* inform the clients that the mac address
                         * has changed
@@ -1043,6 +1059,9 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
                disabled_slave = slave2;
        }
 
+       if (bond->alb_info.slb_enabled)
+               slb_send_learning_packets(bond);
+
        if (bond->alb_info.rlb_enabled && slaves_state_differ) {
                /* A disabled slave was assigned an active mac addr */
                rlb_teach_disabled_mac_on_primary(bond,
@@ -1408,16 +1427,8 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
                /* unbalanced or unassigned, send through primary */
                tx_slave = bond->curr_active_slave;
                bond_info->unbalanced_load += skb->len;
-               printk(KERN_ERR "No slave for %02x:%02x:%02x:%02x:%02x:%02x.\n",
-                      eth_data->h_source[0], eth_data->h_source[1],
-                      eth_data->h_source[2], eth_data->h_source[3],
-                      eth_data->h_source[4], eth_data->h_source[5]);
-               if (tx_slave) {
-                       printk(KERN_ERR "Sending via primary %s (hash_index %x, length %x)\n",
-                              tx_slave->dev->name, hash_index, hash_size);
-               } else {
+               if (!tx_slave)
                        printk(KERN_ERR "No primary interface found\n");
-               }
        }
 
        if (tx_slave && SLAVE_IS_OK(tx_slave)) {
@@ -1469,7 +1480,7 @@ void bond_alb_monitor(struct work_struct *work)
        bond_info->lp_counter++;
 
        /* send learning packets */
-       if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
+       if (!bond->alb_info.slb_enabled && bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
                /* change of curr_active_slave involves swapping of mac addresses.
                 * in order to avoid this swapping from happening while
                 * sending the learning packets, the curr_slave_lock must be held for
@@ -1499,14 +1510,11 @@ void bond_alb_monitor(struct work_struct *work)
                                                BOND_TLB_REBALANCE_INTERVAL;
                                bond_info->unbalanced_load = 0;
                        }
-                       /*
-                        * No need for ARP in the SLB case since the
-                        * RX path remains valid, although we may
-                        * shortly be choosing a different TX path
-                        * which will cause RX to change too.
-                        */
                }
 
+               if (bond->alb_info.slb_enabled)
+                       slb_send_learning_packets(bond);
+
                read_unlock(&bond->curr_slave_lock);
 
                bond_info->tx_rebalance_counter = 0;
@@ -1638,7 +1646,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
                         * gone away. Send a gratuitous packet which
                         * will cause the switch to update its tables.
                         */
-                       br_send_gratuitous_switch_learning_packet(bond->dev);
+                       slb_send_learning_packets(bond);
                }
        } else if (link == BOND_LINK_UP) {
                /* order a rebalance ASAP */
@@ -1736,7 +1744,10 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave
                read_lock(&bond->lock);
        } else {
                read_lock(&bond->lock);
-               alb_send_learning_packets(new_slave, bond->dev->dev_addr);
+               if (bond->alb_info.slb_enabled)
+                       slb_send_learning_packets(bond);
+               else
+                       alb_send_learning_packets(new_slave, bond->dev->dev_addr);
        }
 
        write_lock_bh(&bond->curr_slave_lock);
@@ -1791,7 +1802,10 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
                alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr,
                                       bond->alb_info.rlb_enabled);
 
-               alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr);
+               if (bond->alb_info.slb_enabled)
+                       slb_send_learning_packets(bond);
+               else
+                       alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr);
                if (bond->alb_info.rlb_enabled) {
                        /* inform clients mac address has changed */
                        rlb_req_update_slave_clients(bond, bond->curr_active_slave);
index 35b6a5362429adb968af9e9384880206e5a4dae1..799aaa169c73216e30f2c4e93adedb7a4d0bf220 100644 (file)
@@ -827,7 +827,8 @@ static int bond_set_allmulti(struct bonding *bond, int inc)
  */
 static void bond_mc_add(struct bonding *bond, void *addr, int alen)
 {
-       if (USES_PRIMARY(bond->params.mode)) {
+
+       if (USES_PRIMARY(bond->params.mode) && bond->params.mode != BOND_MODE_SLB) {
                /* write lock already acquired */
                if (bond->curr_active_slave) {
                        dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
@@ -1580,6 +1581,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                        dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
                }
                netif_addr_unlock_bh(bond_dev);
+       } else if (bond->params.mode == BOND_MODE_SLB) {
+               /* set promiscuity level to new slave */
+               if (bond_dev->flags & IFF_PROMISC) {
+                       dev_set_promiscuity(slave_dev, 1);
+               }
        }
 
        if (bond->params.mode == BOND_MODE_8023AD) {
@@ -4490,9 +4496,9 @@ void bond_set_mode_ops(struct bonding *bond, int mode)
                bond_set_xmit_hash_policy(bond);
                break;
        case BOND_MODE_ALB:
+       case BOND_MODE_SLB:
                bond_set_master_alb_flags(bond);
                /* FALLTHRU */
-       case BOND_MODE_SLB:
        case BOND_MODE_TLB:
                bond_dev->hard_start_xmit = bond_alb_xmit;
                bond_dev->set_mac_address = bond_alb_set_mac_address;
index e0e0797bb8bd6ea66078b16f67e4149c6dcb8873..13b47bf85ba1f1da75c87314ab1caec833bfa8de 100644 (file)
@@ -65,7 +65,7 @@
 #define USES_PRIMARY(mode)                             \
                (((mode) == BOND_MODE_ACTIVEBACKUP) ||  \
                 ((mode) == BOND_MODE_TLB)          ||  \
-                ((mode) == BOND_MODE_ALB)          ||  \
+                ((mode) == BOND_MODE_ALB)          ||  \
                 ((mode) == BOND_MODE_SLB))
 
 /*
index a48f5efdb6bfa9b44fb0149136c169857a6e0931..1b02896e4ac1a3146d0ec9136497bcaa45e95d58 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/etherdevice.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <linux/if_arp.h>
 #include <asm/atomic.h>
 #include <asm/unaligned.h>
 #include "br_private.h"
@@ -377,15 +378,17 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
        return ret;
 }
 
-void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
-                  const unsigned char *addr)
+int br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
+                  struct sk_buff *skb)
 {
+       const unsigned char *addr = eth_hdr(skb)->h_source;
        struct hlist_head *head = &br->hash[br_mac_hash(addr)];
        struct net_bridge_fdb_entry *fdb;
+       static const u8 bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
        /* some users want to always flood. */
        if (hold_time(br) == 0)
-               return;
+               return 1;
 
        /* ignore packets unless we are using this port */
        if (!(source->state == BR_STATE_LEARNING ||
@@ -394,8 +397,51 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 
        fdb = fdb_find(head, addr);
        if (likely(fdb)) {
+               /*
+                * If this is an address arriving on the physical port
+                * which we have previously seen on a non-physical
+                * port then ignore it.
+                 *
+                 * _Unless_ it is a broadcast ARP reply in which case
+                 * the guest in question has migrated.
+                */
+               extern struct net_bridge_port *br_locate_physical_port(struct net_bridge *br);
+               struct net_bridge_port *phys_port = br_locate_physical_port(br);
+               if (phys_port && phys_port != fdb->dst && phys_port == source) {
+#pragma pack(1)
+                       struct arp_pkt {
+                               u16     hw_addr_space;
+                               u16     prot_addr_space;
+                               u8      hw_addr_len;
+                               u8      prot_addr_len;
+                               u16     op_code;
+                               u8      mac_src[ETH_ALEN];      /* sender hardware address */
+                               u32     ip_src;                 /* sender IP address */
+                               u8      mac_dst[ETH_ALEN];      /* target hardware address */
+                               u32     ip_dst;                 /* target IP address */
+                       };
+#pragma pack()
+                       struct arp_pkt *arp = (struct arp_pkt *)skb->data;
+
+                       if (compare_ether_addr(bcast, addr) != 0)
+                               return 0;
+
+                       if (!arp)
+                               return 0;
+
+                       if (skb->len < sizeof(struct arp_pkt))
+                               return 0;
+
+                       if (eth_hdr(skb)->h_proto != htons(ETH_P_ARP))
+                               return 0;
+
+                       if (arp->op_code != htons(ARPOP_REPLY))
+                               return 0;
+               }
+
                /* attempt to update an entry for a local interface */
                if (unlikely(fdb->is_local)) {
+                       return 0;
                        if (net_ratelimit())
                                printk(KERN_WARNING "%s: received packet with "
                                       " own address as source address\n",
@@ -414,4 +460,6 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
                 */
                spin_unlock(&br->hash_lock);
        }
+
+       return 1;
 }
index b5648f6e522564b3a2071b4c3eb8676283a57a45..f969ee8e949c20bfe823ad0b375c3ce951695c4a 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
@@ -205,6 +206,7 @@ static struct net_device *new_bridge_dev(const char *name)
        br->topology_change = 0;
        br->topology_change_detected = 0;
        br->ageing_time = 300 * HZ;
+       br->phys_port = NULL;
 
        br_netfilter_rtable_init(br);
 
@@ -266,6 +268,20 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
        return p;
 }
 
+struct net_bridge_port *br_locate_physical_port(struct net_bridge *br)
+{
+       struct net_bridge_port *p;
+       if (!br->phys_port) {
+               list_for_each_entry(p, &br->port_list, list) {
+                       if (!compare_ether_addr(br->dev->dev_addr, p->dev->dev_addr)) {
+                               br->phys_port = p;
+                               break;
+                       }
+               }
+       }
+       return br->phys_port;
+}
+
 struct net_device *br_locate_physical_device(struct net_device *dev)
 {
        struct net_bridge *br;
@@ -275,55 +291,52 @@ struct net_device *br_locate_physical_device(struct net_device *dev)
                return dev;
 
        br = netdev_priv(dev);
+       p = br_locate_physical_port(br);
 
-       list_for_each_entry(p, &br->port_list, list) {
-               if (!compare_ether_addr(dev->dev_addr, p->dev->dev_addr))
-                       return p->dev;
-       }
-       return dev;
+       return p ? p->dev : dev;
 }
 EXPORT_SYMBOL(br_locate_physical_device);
 
 static struct sk_buff *create_switch_learning_packet(struct net_device *dev, unsigned char *src_hw)
 {
+#pragma pack(1)
+       struct learning_pkt {
+               u8 mac_dst[ETH_ALEN];
+               u8 mac_src[ETH_ALEN];
+               u16 type;
+               u8 padding[ETH_ZLEN - ETH_HLEN];
+       };
+#pragma pack()
        struct sk_buff *skb;
-       unsigned char *data;
-
-       /*
-        * Xen OUI is 00-16-3E therefore multicast address is 01-16-3E.
-        * Use the first of these addresses as our destination address with protocol type 0.
-        * Include the physical interface's MAC address as the payload.
-        */
-       unsigned char dest_hw[ETH_ALEN] = {0x01, 0x16, 0x3e, 0x00, 0x00, 0x00};
-
-       skb = alloc_skb(ETH_ALEN + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
-       if (skb == NULL)
+       struct learning_pkt pkt;
+       int size = sizeof(struct learning_pkt);
+       char *data;
+       const unsigned char dest_hw[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+       memset(&pkt, 0, size);
+       memcpy(pkt.mac_dst, dest_hw, ETH_ALEN);
+       memcpy(pkt.mac_src, src_hw, ETH_ALEN);
+       pkt.type = __constant_htons(ETH_P_LOOP);
+
+       skb = dev_alloc_skb(size);
+       if (!skb)
                return NULL;
 
-       skb_reserve(skb, LL_RESERVED_SPACE(dev));
-       skb->nh.raw = skb->data;
-       data = (unsigned char *) skb_put(skb, ETH_ALEN);
+       data = skb_put(skb, size);
+       memcpy(data, &pkt, size);
 
+       skb->protocol = pkt.type;
+       skb->priority = TC_PRIO_CONTROL;
        skb->dev = dev;
-       skb->protocol = 0;
-
-       if (dev->hard_header &&
-           dev->hard_header(skb,dev,0,&dest_hw,src_hw,skb->len) < 0)
-               goto out;
-
-       memcpy(data, dev->dev_addr, ETH_ALEN);
 
        return skb;
-
-out:
-       kfree_skb(skb);
-       return NULL;
 }
 
 void br_send_gratuitous_switch_learning_packet(struct net_device *dev)
 {
        struct net_bridge *br;
        struct net_device *phys;
+       struct sk_buff *skb;
        int i;
 
        if (!dev->br_port)
@@ -343,16 +356,22 @@ void br_send_gratuitous_switch_learning_packet(struct net_device *dev)
                        if (f->dst != phys->br_port &&
                            f->dst->dev->addr_len == ETH_ALEN &&
                            memcmp(&f->dst->dev->dev_addr[0], &f->addr.addr[0], ETH_ALEN) != 0) {
-                               struct sk_buff *skb;
                                skb = create_switch_learning_packet(dev, f->addr.addr);
 
                                if (skb == NULL)
                                        goto out;
 
                                dev_queue_xmit(skb);
+
+                               f->ageing_timer = jiffies;
                        }
                }
        }
+
+       skb = create_switch_learning_packet(dev, dev->dev_addr);
+       if (skb)
+               dev_queue_xmit(skb);
+
 out:
        spin_unlock_bh(&br->hash_lock);
 }
@@ -540,6 +559,9 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
        if (!p || p->br != br)
                return -EINVAL;
 
+       if ( p == br->phys_port )
+               br->phys_port = NULL;
+
        del_nbp(p);
 
        spin_lock_bh(&br->lock);
index 30b88777c3df1ad3ab934832e8738c6110760d4c..21f4d85f64c34e286d0c4c1a4c7f8cefff239eb5 100644 (file)
@@ -48,7 +48,8 @@ int br_handle_frame_finish(struct sk_buff *skb)
 
        /* insert into forwarding database after filtering to avoid spoofing */
        br = p->br;
-       br_fdb_update(br, p, eth_hdr(skb)->h_source);
+       if (!br_fdb_update(br, p, skb))
+               goto drop;
 
        if (p->state == BR_STATE_LEARNING)
                goto drop;
@@ -95,11 +96,15 @@ static int br_handle_local_finish(struct sk_buff *skb)
 {
        struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
 
-       if (p)
-               br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+       if (p) {
+               if (!br_fdb_update(p->br, p, skb)) {
+                       kfree_skb(skb);
+                       return 1;
+               }
+       }
+
        return 0;        /* process further */
 }
-
 /* Does address match the link local multicast address.
  * 01:80:c2:00:00:0X
  */
index 310edbc8e85154744f000eaa8954898b50153bdb..9e48b11bd1a8c243c35e2678056270c9c1a631c2 100644 (file)
@@ -91,6 +91,7 @@ struct net_bridge
        struct list_head                port_list;
        struct list_head                promiscuous_list;
        struct net_device               *dev;
+       struct net_bridge_port          *phys_port; /* One of our ports will contains the route to the physical world */
        spinlock_t                      hash_lock;
        struct hlist_head               hash[BR_HASH_SIZE];
        struct list_head                age_list;
@@ -164,9 +165,9 @@ extern int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 extern int br_fdb_insert(struct net_bridge *br,
                         struct net_bridge_port *source,
                         const unsigned char *addr);
-extern void br_fdb_update(struct net_bridge *br,
-                         struct net_bridge_port *source,
-                         const unsigned char *addr);
+extern int br_fdb_update(struct net_bridge *br,
+                        struct net_bridge_port *source,
+                        struct sk_buff *skb);
 
 /* br_forward.c */
 extern void br_deliver(const struct net_bridge_port *to,