ia64/xen-unstable

view patches/linux-2.6.16.13/net-gso.patch @ 10714:a4041ac6f152

[NET] net-gso.patch: Fix up GSO packets with broken checksums

Here is the original changelog:

[NET] gso: Fix up GSO packets with broken checksums

Certain subsystems in the stack (e.g., netfilter) can break the
partial
checksum on GSO packets. Until they're fixed, this patch allows
this to
work by recomputing the partial checksums through the GSO
mechanism.

Once they've all been converted to update the partial checksum
instead of
clearing it, this workaround can be removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kfraser@localhost.localdomain
date Mon Jul 10 15:36:04 2006 +0100 (2006-07-10)
parents 17e9daeb2c50
children
line source
1 diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
2 index 3c0a5ba..847cedb 100644
3 --- a/Documentation/networking/netdevices.txt
4 +++ b/Documentation/networking/netdevices.txt
5 @@ -42,9 +42,9 @@ dev->get_stats:
6 Context: nominally process, but don't sleep inside an rwlock
8 dev->hard_start_xmit:
9 - Synchronization: dev->xmit_lock spinlock.
10 + Synchronization: netif_tx_lock spinlock.
11 When the driver sets NETIF_F_LLTX in dev->features this will be
12 - called without holding xmit_lock. In this case the driver
13 + called without holding netif_tx_lock. In this case the driver
14 has to lock by itself when needed. It is recommended to use a try lock
15 for this and return -1 when the spin lock fails.
16 The locking there should also properly protect against
17 @@ -62,12 +62,12 @@ dev->hard_start_xmit:
18 Only valid when NETIF_F_LLTX is set.
20 dev->tx_timeout:
21 - Synchronization: dev->xmit_lock spinlock.
22 + Synchronization: netif_tx_lock spinlock.
23 Context: BHs disabled
24 Notes: netif_queue_stopped() is guaranteed true
26 dev->set_multicast_list:
27 - Synchronization: dev->xmit_lock spinlock.
28 + Synchronization: netif_tx_lock spinlock.
29 Context: BHs disabled
31 dev->poll:
32 diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
33 index 4be9769..2e7cac7 100644
34 --- a/drivers/block/aoe/aoenet.c
35 +++ b/drivers/block/aoe/aoenet.c
36 @@ -95,9 +95,8 @@ mac_addr(char addr[6])
37 static struct sk_buff *
38 skb_check(struct sk_buff *skb)
39 {
40 - if (skb_is_nonlinear(skb))
41 if ((skb = skb_share_check(skb, GFP_ATOMIC)))
42 - if (skb_linearize(skb, GFP_ATOMIC) < 0) {
43 + if (skb_linearize(skb)) {
44 dev_kfree_skb(skb);
45 return NULL;
46 }
47 diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
48 index a2408d7..c90e620 100644
49 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
50 +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
51 @@ -821,7 +821,8 @@ void ipoib_mcast_restart_task(void *dev_
53 ipoib_mcast_stop_thread(dev, 0);
55 - spin_lock_irqsave(&dev->xmit_lock, flags);
56 + local_irq_save(flags);
57 + netif_tx_lock(dev);
58 spin_lock(&priv->lock);
60 /*
61 @@ -896,7 +897,8 @@ void ipoib_mcast_restart_task(void *dev_
62 }
64 spin_unlock(&priv->lock);
65 - spin_unlock_irqrestore(&dev->xmit_lock, flags);
66 + netif_tx_unlock(dev);
67 + local_irq_restore(flags);
69 /* We have to cancel outside of the spinlock */
70 list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
71 diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
72 index 6711eb6..8d2351f 100644
73 --- a/drivers/media/dvb/dvb-core/dvb_net.c
74 +++ b/drivers/media/dvb/dvb-core/dvb_net.c
75 @@ -1052,7 +1052,7 @@ static void wq_set_multicast_list (void
77 dvb_net_feed_stop(dev);
78 priv->rx_mode = RX_MODE_UNI;
79 - spin_lock_bh(&dev->xmit_lock);
80 + netif_tx_lock_bh(dev);
82 if (dev->flags & IFF_PROMISC) {
83 dprintk("%s: promiscuous mode\n", dev->name);
84 @@ -1077,7 +1077,7 @@ static void wq_set_multicast_list (void
85 }
86 }
88 - spin_unlock_bh(&dev->xmit_lock);
89 + netif_tx_unlock_bh(dev);
90 dvb_net_feed_start(dev);
91 }
93 diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
94 index dd41049..6615583 100644
95 --- a/drivers/net/8139cp.c
96 +++ b/drivers/net/8139cp.c
97 @@ -794,7 +794,7 @@ #endif
98 entry = cp->tx_head;
99 eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
100 if (dev->features & NETIF_F_TSO)
101 - mss = skb_shinfo(skb)->tso_size;
102 + mss = skb_shinfo(skb)->gso_size;
104 if (skb_shinfo(skb)->nr_frags == 0) {
105 struct cp_desc *txd = &cp->tx_ring[entry];
106 diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
107 index a24200d..29d9218 100644
108 --- a/drivers/net/bnx2.c
109 +++ b/drivers/net/bnx2.c
110 @@ -1593,7 +1593,7 @@ bnx2_tx_int(struct bnx2 *bp)
111 skb = tx_buf->skb;
112 #ifdef BCM_TSO
113 /* partial BD completions possible with TSO packets */
114 - if (skb_shinfo(skb)->tso_size) {
115 + if (skb_is_gso(skb)) {
116 u16 last_idx, last_ring_idx;
118 last_idx = sw_cons +
119 @@ -1948,7 +1948,7 @@ bnx2_poll(struct net_device *dev, int *b
120 return 1;
121 }
123 -/* Called with rtnl_lock from vlan functions and also dev->xmit_lock
124 +/* Called with rtnl_lock from vlan functions and also netif_tx_lock
125 * from set_multicast.
126 */
127 static void
128 @@ -4403,7 +4403,7 @@ bnx2_vlan_rx_kill_vid(struct net_device
129 }
130 #endif
132 -/* Called with dev->xmit_lock.
133 +/* Called with netif_tx_lock.
134 * hard_start_xmit is pseudo-lockless - a lock is only required when
135 * the tx queue is full. This way, we get the benefit of lockless
136 * operations most of the time without the complexities to handle
137 @@ -4441,7 +4441,7 @@ bnx2_start_xmit(struct sk_buff *skb, str
138 (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
139 }
140 #ifdef BCM_TSO
141 - if ((mss = skb_shinfo(skb)->tso_size) &&
142 + if ((mss = skb_shinfo(skb)->gso_size) &&
143 (skb->len > (bp->dev->mtu + ETH_HLEN))) {
144 u32 tcp_opt_len, ip_tcp_len;
146 diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
147 index bcf9f17..e970921 100644
148 --- a/drivers/net/bonding/bond_main.c
149 +++ b/drivers/net/bonding/bond_main.c
150 @@ -1145,8 +1145,7 @@ int bond_sethwaddr(struct net_device *bo
151 }
153 #define BOND_INTERSECT_FEATURES \
154 - (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\
155 - NETIF_F_TSO|NETIF_F_UFO)
156 + (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO)
158 /*
159 * Compute the common dev->feature set available to all slaves. Some
160 @@ -1164,9 +1163,7 @@ static int bond_compute_features(struct
161 features &= (slave->dev->features & BOND_INTERSECT_FEATURES);
163 if ((features & NETIF_F_SG) &&
164 - !(features & (NETIF_F_IP_CSUM |
165 - NETIF_F_NO_CSUM |
166 - NETIF_F_HW_CSUM)))
167 + !(features & NETIF_F_ALL_CSUM))
168 features &= ~NETIF_F_SG;
170 /*
171 @@ -4147,7 +4144,7 @@ static int bond_init(struct net_device *
172 */
173 bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
175 - /* don't acquire bond device's xmit_lock when
176 + /* don't acquire bond device's netif_tx_lock when
177 * transmitting */
178 bond_dev->features |= NETIF_F_LLTX;
180 diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
181 index 30ff8ea..7d72e16 100644
182 --- a/drivers/net/chelsio/sge.c
183 +++ b/drivers/net/chelsio/sge.c
184 @@ -1419,7 +1419,7 @@ int t1_start_xmit(struct sk_buff *skb, s
185 struct cpl_tx_pkt *cpl;
187 #ifdef NETIF_F_TSO
188 - if (skb_shinfo(skb)->tso_size) {
189 + if (skb_is_gso(skb)) {
190 int eth_type;
191 struct cpl_tx_pkt_lso *hdr;
193 @@ -1434,7 +1434,7 @@ #ifdef NETIF_F_TSO
194 hdr->ip_hdr_words = skb->nh.iph->ihl;
195 hdr->tcp_hdr_words = skb->h.th->doff;
196 hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
197 - skb_shinfo(skb)->tso_size));
198 + skb_shinfo(skb)->gso_size));
199 hdr->len = htonl(skb->len - sizeof(*hdr));
200 cpl = (struct cpl_tx_pkt *)hdr;
201 sge->stats.tx_lso_pkts++;
202 diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
203 index fa29402..96ddc24 100644
204 --- a/drivers/net/e1000/e1000_main.c
205 +++ b/drivers/net/e1000/e1000_main.c
206 @@ -2526,7 +2526,7 @@ #ifdef NETIF_F_TSO
207 uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
208 int err;
210 - if (skb_shinfo(skb)->tso_size) {
211 + if (skb_is_gso(skb)) {
212 if (skb_header_cloned(skb)) {
213 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
214 if (err)
215 @@ -2534,7 +2534,7 @@ #ifdef NETIF_F_TSO
216 }
218 hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
219 - mss = skb_shinfo(skb)->tso_size;
220 + mss = skb_shinfo(skb)->gso_size;
221 if (skb->protocol == ntohs(ETH_P_IP)) {
222 skb->nh.iph->tot_len = 0;
223 skb->nh.iph->check = 0;
224 @@ -2651,7 +2651,7 @@ #ifdef NETIF_F_TSO
225 * tso gets written back prematurely before the data is fully
226 * DMAd to the controller */
227 if (!skb->data_len && tx_ring->last_tx_tso &&
228 - !skb_shinfo(skb)->tso_size) {
229 + !skb_is_gso(skb)) {
230 tx_ring->last_tx_tso = 0;
231 size -= 4;
232 }
233 @@ -2893,7 +2893,7 @@ #endif
234 }
236 #ifdef NETIF_F_TSO
237 - mss = skb_shinfo(skb)->tso_size;
238 + mss = skb_shinfo(skb)->gso_size;
239 /* The controller does a simple calculation to
240 * make sure there is enough room in the FIFO before
241 * initiating the DMA for each buffer. The calc is:
242 @@ -2934,8 +2934,7 @@ #endif
244 #ifdef NETIF_F_TSO
245 /* Controller Erratum workaround */
246 - if (!skb->data_len && tx_ring->last_tx_tso &&
247 - !skb_shinfo(skb)->tso_size)
248 + if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
249 count++;
250 #endif
252 diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
253 index 3682ec6..c6ca459 100644
254 --- a/drivers/net/forcedeth.c
255 +++ b/drivers/net/forcedeth.c
256 @@ -482,9 +482,9 @@ #define LPA_1000HALF 0x0400
257 * critical parts:
258 * - rx is (pseudo-) lockless: it relies on the single-threading provided
259 * by the arch code for interrupts.
260 - * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission
261 + * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
262 * needs dev->priv->lock :-(
263 - * - set_multicast_list: preparation lockless, relies on dev->xmit_lock.
264 + * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
265 */
267 /* in dev: base, irq */
268 @@ -1016,7 +1016,7 @@ static void drain_ring(struct net_device
270 /*
271 * nv_start_xmit: dev->hard_start_xmit function
272 - * Called with dev->xmit_lock held.
273 + * Called with netif_tx_lock held.
274 */
275 static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 @@ -1105,8 +1105,8 @@ static int nv_start_xmit(struct sk_buff
278 np->tx_skbuff[nr] = skb;
280 #ifdef NETIF_F_TSO
281 - if (skb_shinfo(skb)->tso_size)
282 - tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
283 + if (skb_is_gso(skb))
284 + tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
285 else
286 #endif
287 tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
288 @@ -1203,7 +1203,7 @@ static void nv_tx_done(struct net_device
290 /*
291 * nv_tx_timeout: dev->tx_timeout function
292 - * Called with dev->xmit_lock held.
293 + * Called with netif_tx_lock held.
294 */
295 static void nv_tx_timeout(struct net_device *dev)
296 {
297 @@ -1524,7 +1524,7 @@ static int nv_change_mtu(struct net_devi
298 * Changing the MTU is a rare event, it shouldn't matter.
299 */
300 disable_irq(dev->irq);
301 - spin_lock_bh(&dev->xmit_lock);
302 + netif_tx_lock_bh(dev);
303 spin_lock(&np->lock);
304 /* stop engines */
305 nv_stop_rx(dev);
306 @@ -1559,7 +1559,7 @@ static int nv_change_mtu(struct net_devi
307 nv_start_rx(dev);
308 nv_start_tx(dev);
309 spin_unlock(&np->lock);
310 - spin_unlock_bh(&dev->xmit_lock);
311 + netif_tx_unlock_bh(dev);
312 enable_irq(dev->irq);
313 }
314 return 0;
315 @@ -1594,7 +1594,7 @@ static int nv_set_mac_address(struct net
316 memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN);
318 if (netif_running(dev)) {
319 - spin_lock_bh(&dev->xmit_lock);
320 + netif_tx_lock_bh(dev);
321 spin_lock_irq(&np->lock);
323 /* stop rx engine */
324 @@ -1606,7 +1606,7 @@ static int nv_set_mac_address(struct net
325 /* restart rx engine */
326 nv_start_rx(dev);
327 spin_unlock_irq(&np->lock);
328 - spin_unlock_bh(&dev->xmit_lock);
329 + netif_tx_unlock_bh(dev);
330 } else {
331 nv_copy_mac_to_hw(dev);
332 }
333 @@ -1615,7 +1615,7 @@ static int nv_set_mac_address(struct net
335 /*
336 * nv_set_multicast: dev->set_multicast function
337 - * Called with dev->xmit_lock held.
338 + * Called with netif_tx_lock held.
339 */
340 static void nv_set_multicast(struct net_device *dev)
341 {
342 diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
343 index 102c1f0..d12605f 100644
344 --- a/drivers/net/hamradio/6pack.c
345 +++ b/drivers/net/hamradio/6pack.c
346 @@ -308,9 +308,9 @@ static int sp_set_mac_address(struct net
347 {
348 struct sockaddr_ax25 *sa = addr;
350 - spin_lock_irq(&dev->xmit_lock);
351 + netif_tx_lock_bh(dev);
352 memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
353 - spin_unlock_irq(&dev->xmit_lock);
354 + netif_tx_unlock_bh(dev);
356 return 0;
357 }
358 @@ -767,9 +767,9 @@ static int sixpack_ioctl(struct tty_stru
359 break;
360 }
362 - spin_lock_irq(&dev->xmit_lock);
363 + netif_tx_lock_bh(dev);
364 memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN);
365 - spin_unlock_irq(&dev->xmit_lock);
366 + netif_tx_unlock_bh(dev);
368 err = 0;
369 break;
370 diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
371 index dc5e9d5..5c66f5a 100644
372 --- a/drivers/net/hamradio/mkiss.c
373 +++ b/drivers/net/hamradio/mkiss.c
374 @@ -357,9 +357,9 @@ static int ax_set_mac_address(struct net
375 {
376 struct sockaddr_ax25 *sa = addr;
378 - spin_lock_irq(&dev->xmit_lock);
379 + netif_tx_lock_bh(dev);
380 memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
381 - spin_unlock_irq(&dev->xmit_lock);
382 + netif_tx_unlock_bh(dev);
384 return 0;
385 }
386 @@ -886,9 +886,9 @@ static int mkiss_ioctl(struct tty_struct
387 break;
388 }
390 - spin_lock_irq(&dev->xmit_lock);
391 + netif_tx_lock_bh(dev);
392 memcpy(dev->dev_addr, addr, AX25_ADDR_LEN);
393 - spin_unlock_irq(&dev->xmit_lock);
394 + netif_tx_unlock_bh(dev);
396 err = 0;
397 break;
398 diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
399 index 31fb2d7..2e222ef 100644
400 --- a/drivers/net/ifb.c
401 +++ b/drivers/net/ifb.c
402 @@ -76,13 +76,13 @@ static void ri_tasklet(unsigned long dev
403 dp->st_task_enter++;
404 if ((skb = skb_peek(&dp->tq)) == NULL) {
405 dp->st_txq_refl_try++;
406 - if (spin_trylock(&_dev->xmit_lock)) {
407 + if (netif_tx_trylock(_dev)) {
408 dp->st_rxq_enter++;
409 while ((skb = skb_dequeue(&dp->rq)) != NULL) {
410 skb_queue_tail(&dp->tq, skb);
411 dp->st_rx2tx_tran++;
412 }
413 - spin_unlock(&_dev->xmit_lock);
414 + netif_tx_unlock(_dev);
415 } else {
416 /* reschedule */
417 dp->st_rxq_notenter++;
418 @@ -110,7 +110,7 @@ static void ri_tasklet(unsigned long dev
419 }
420 }
422 - if (spin_trylock(&_dev->xmit_lock)) {
423 + if (netif_tx_trylock(_dev)) {
424 dp->st_rxq_check++;
425 if ((skb = skb_peek(&dp->rq)) == NULL) {
426 dp->tasklet_pending = 0;
427 @@ -118,10 +118,10 @@ static void ri_tasklet(unsigned long dev
428 netif_wake_queue(_dev);
429 } else {
430 dp->st_rxq_rsch++;
431 - spin_unlock(&_dev->xmit_lock);
432 + netif_tx_unlock(_dev);
433 goto resched;
434 }
435 - spin_unlock(&_dev->xmit_lock);
436 + netif_tx_unlock(_dev);
437 } else {
438 resched:
439 dp->tasklet_pending = 1;
440 diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
441 index a9f49f0..339d4a7 100644
442 --- a/drivers/net/irda/vlsi_ir.c
443 +++ b/drivers/net/irda/vlsi_ir.c
444 @@ -959,7 +959,7 @@ static int vlsi_hard_start_xmit(struct s
445 || (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec))
446 break;
447 udelay(100);
448 - /* must not sleep here - we are called under xmit_lock! */
449 + /* must not sleep here - called under netif_tx_lock! */
450 }
451 }
453 diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
454 index f9f77e4..7d187d0 100644
455 --- a/drivers/net/ixgb/ixgb_main.c
456 +++ b/drivers/net/ixgb/ixgb_main.c
457 @@ -1163,7 +1163,7 @@ #ifdef NETIF_F_TSO
458 uint16_t ipcse, tucse, mss;
459 int err;
461 - if(likely(skb_shinfo(skb)->tso_size)) {
462 + if (likely(skb_is_gso(skb))) {
463 if (skb_header_cloned(skb)) {
464 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
465 if (err)
466 @@ -1171,7 +1171,7 @@ #ifdef NETIF_F_TSO
467 }
469 hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
470 - mss = skb_shinfo(skb)->tso_size;
471 + mss = skb_shinfo(skb)->gso_size;
472 skb->nh.iph->tot_len = 0;
473 skb->nh.iph->check = 0;
474 skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
475 diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
476 index 690a1aa..3843e0a 100644
477 --- a/drivers/net/loopback.c
478 +++ b/drivers/net/loopback.c
479 @@ -74,7 +74,7 @@ static void emulate_large_send_offload(s
480 struct iphdr *iph = skb->nh.iph;
481 struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
482 unsigned int doffset = (iph->ihl + th->doff) * 4;
483 - unsigned int mtu = skb_shinfo(skb)->tso_size + doffset;
484 + unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
485 unsigned int offset = 0;
486 u32 seq = ntohl(th->seq);
487 u16 id = ntohs(iph->id);
488 @@ -139,7 +139,7 @@ #ifndef LOOPBACK_MUST_CHECKSUM
489 #endif
491 #ifdef LOOPBACK_TSO
492 - if (skb_shinfo(skb)->tso_size) {
493 + if (skb_is_gso(skb)) {
494 BUG_ON(skb->protocol != htons(ETH_P_IP));
495 BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
497 diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
498 index c0998ef..0fac9d5 100644
499 --- a/drivers/net/mv643xx_eth.c
500 +++ b/drivers/net/mv643xx_eth.c
501 @@ -1107,7 +1107,7 @@ static int mv643xx_eth_start_xmit(struct
503 #ifdef MV643XX_CHECKSUM_OFFLOAD_TX
504 if (has_tiny_unaligned_frags(skb)) {
505 - if ((skb_linearize(skb, GFP_ATOMIC) != 0)) {
506 + if (__skb_linearize(skb)) {
507 stats->tx_dropped++;
508 printk(KERN_DEBUG "%s: failed to linearize tiny "
509 "unaligned fragment\n", dev->name);
510 diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
511 index 9d6d254..c9ed624 100644
512 --- a/drivers/net/natsemi.c
513 +++ b/drivers/net/natsemi.c
514 @@ -323,12 +323,12 @@ performance critical codepaths:
515 The rx process only runs in the interrupt handler. Access from outside
516 the interrupt handler is only permitted after disable_irq().
518 -The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap
519 +The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap
520 is set, then access is permitted under spin_lock_irq(&np->lock).
522 Thus configuration functions that want to access everything must call
523 disable_irq(dev->irq);
524 - spin_lock_bh(dev->xmit_lock);
525 + netif_tx_lock_bh(dev);
526 spin_lock_irq(&np->lock);
528 IV. Notes
529 diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
530 index 8cc0d0b..e53b313 100644
531 --- a/drivers/net/r8169.c
532 +++ b/drivers/net/r8169.c
533 @@ -2171,7 +2171,7 @@ static int rtl8169_xmit_frags(struct rtl
534 static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
535 {
536 if (dev->features & NETIF_F_TSO) {
537 - u32 mss = skb_shinfo(skb)->tso_size;
538 + u32 mss = skb_shinfo(skb)->gso_size;
540 if (mss)
541 return LargeSend | ((mss & MSSMask) << MSSShift);
542 diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
543 index b7f00d6..439f45f 100644
544 --- a/drivers/net/s2io.c
545 +++ b/drivers/net/s2io.c
546 @@ -3522,8 +3522,8 @@ #endif
547 txdp->Control_1 = 0;
548 txdp->Control_2 = 0;
549 #ifdef NETIF_F_TSO
550 - mss = skb_shinfo(skb)->tso_size;
551 - if (mss) {
552 + mss = skb_shinfo(skb)->gso_size;
553 + if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) {
554 txdp->Control_1 |= TXD_TCP_LSO_EN;
555 txdp->Control_1 |= TXD_TCP_LSO_MSS(mss);
556 }
557 @@ -3543,10 +3543,10 @@ #endif
558 }
560 frg_len = skb->len - skb->data_len;
561 - if (skb_shinfo(skb)->ufo_size) {
562 + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) {
563 int ufo_size;
565 - ufo_size = skb_shinfo(skb)->ufo_size;
566 + ufo_size = skb_shinfo(skb)->gso_size;
567 ufo_size &= ~7;
568 txdp->Control_1 |= TXD_UFO_EN;
569 txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
570 @@ -3572,7 +3572,7 @@ #endif
571 txdp->Host_Control = (unsigned long) skb;
572 txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
574 - if (skb_shinfo(skb)->ufo_size)
575 + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
576 txdp->Control_1 |= TXD_UFO_EN;
578 frg_cnt = skb_shinfo(skb)->nr_frags;
579 @@ -3587,12 +3587,12 @@ #endif
580 (sp->pdev, frag->page, frag->page_offset,
581 frag->size, PCI_DMA_TODEVICE);
582 txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size);
583 - if (skb_shinfo(skb)->ufo_size)
584 + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
585 txdp->Control_1 |= TXD_UFO_EN;
586 }
587 txdp->Control_1 |= TXD_GATHER_CODE_LAST;
589 - if (skb_shinfo(skb)->ufo_size)
590 + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
591 frg_cnt++; /* as Txd0 was used for inband header */
593 tx_fifo = mac_control->tx_FIFO_start[queue];
594 @@ -3606,7 +3606,7 @@ #ifdef NETIF_F_TSO
595 if (mss)
596 val64 |= TX_FIFO_SPECIAL_FUNC;
597 #endif
598 - if (skb_shinfo(skb)->ufo_size)
599 + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
600 val64 |= TX_FIFO_SPECIAL_FUNC;
601 writeq(val64, &tx_fifo->List_Control);
603 diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
604 index 0618cd5..aa06a82 100644
605 --- a/drivers/net/sky2.c
606 +++ b/drivers/net/sky2.c
607 @@ -1125,7 +1125,7 @@ static unsigned tx_le_req(const struct s
608 count = sizeof(dma_addr_t) / sizeof(u32);
609 count += skb_shinfo(skb)->nr_frags * count;
611 - if (skb_shinfo(skb)->tso_size)
612 + if (skb_is_gso(skb))
613 ++count;
615 if (skb->ip_summed == CHECKSUM_HW)
616 @@ -1197,7 +1197,7 @@ static int sky2_xmit_frame(struct sk_buf
617 }
619 /* Check for TCP Segmentation Offload */
620 - mss = skb_shinfo(skb)->tso_size;
621 + mss = skb_shinfo(skb)->gso_size;
622 if (mss != 0) {
623 /* just drop the packet if non-linear expansion fails */
624 if (skb_header_cloned(skb) &&
625 diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
626 index caf4102..fc9164a 100644
627 --- a/drivers/net/tg3.c
628 +++ b/drivers/net/tg3.c
629 @@ -3664,7 +3664,7 @@ static int tg3_start_xmit(struct sk_buff
630 #if TG3_TSO_SUPPORT != 0
631 mss = 0;
632 if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
633 - (mss = skb_shinfo(skb)->tso_size) != 0) {
634 + (mss = skb_shinfo(skb)->gso_size) != 0) {
635 int tcp_opt_len, ip_tcp_len;
637 if (skb_header_cloned(skb) &&
638 diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
639 index 5b1af39..11de5af 100644
640 --- a/drivers/net/tulip/winbond-840.c
641 +++ b/drivers/net/tulip/winbond-840.c
642 @@ -1605,11 +1605,11 @@ #ifdef CONFIG_PM
643 * - get_stats:
644 * spin_lock_irq(np->lock), doesn't touch hw if not present
645 * - hard_start_xmit:
646 - * netif_stop_queue + spin_unlock_wait(&dev->xmit_lock);
647 + * synchronize_irq + netif_tx_disable;
648 * - tx_timeout:
649 - * netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
650 + * netif_device_detach + netif_tx_disable;
651 * - set_multicast_list
652 - * netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
653 + * netif_device_detach + netif_tx_disable;
654 * - interrupt handler
655 * doesn't touch hw if not present, synchronize_irq waits for
656 * running instances of the interrupt handler.
657 @@ -1635,11 +1635,10 @@ static int w840_suspend (struct pci_dev
658 netif_device_detach(dev);
659 update_csr6(dev, 0);
660 iowrite32(0, ioaddr + IntrEnable);
661 - netif_stop_queue(dev);
662 spin_unlock_irq(&np->lock);
664 - spin_unlock_wait(&dev->xmit_lock);
665 synchronize_irq(dev->irq);
666 + netif_tx_disable(dev);
668 np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff;
670 diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
671 index 4c76cb7..3d62abc 100644
672 --- a/drivers/net/typhoon.c
673 +++ b/drivers/net/typhoon.c
674 @@ -340,7 +340,7 @@ #define typhoon_synchronize_irq(x) synch
675 #endif
677 #if defined(NETIF_F_TSO)
678 -#define skb_tso_size(x) (skb_shinfo(x)->tso_size)
679 +#define skb_tso_size(x) (skb_shinfo(x)->gso_size)
680 #define TSO_NUM_DESCRIPTORS 2
681 #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT
682 #else
683 @@ -805,7 +805,7 @@ typhoon_start_tx(struct sk_buff *skb, st
684 * If problems develop with TSO, check this first.
685 */
686 numDesc = skb_shinfo(skb)->nr_frags + 1;
687 - if(skb_tso_size(skb))
688 + if (skb_is_gso(skb))
689 numDesc++;
691 /* When checking for free space in the ring, we need to also
692 @@ -845,7 +845,7 @@ typhoon_start_tx(struct sk_buff *skb, st
693 TYPHOON_TX_PF_VLAN_TAG_SHIFT);
694 }
696 - if(skb_tso_size(skb)) {
697 + if (skb_is_gso(skb)) {
698 first_txd->processFlags |= TYPHOON_TX_PF_TCP_SEGMENT;
699 first_txd->numDesc++;
701 diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
702 index ed1f837..2eb6b5f 100644
703 --- a/drivers/net/via-velocity.c
704 +++ b/drivers/net/via-velocity.c
705 @@ -1899,6 +1899,13 @@ static int velocity_xmit(struct sk_buff
707 int pktlen = skb->len;
709 +#ifdef VELOCITY_ZERO_COPY_SUPPORT
710 + if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
711 + kfree_skb(skb);
712 + return 0;
713 + }
714 +#endif
715 +
716 spin_lock_irqsave(&vptr->lock, flags);
718 index = vptr->td_curr[qnum];
719 @@ -1914,8 +1921,6 @@ static int velocity_xmit(struct sk_buff
720 */
721 if (pktlen < ETH_ZLEN) {
722 /* Cannot occur until ZC support */
723 - if(skb_linearize(skb, GFP_ATOMIC))
724 - return 0;
725 pktlen = ETH_ZLEN;
726 memcpy(tdinfo->buf, skb->data, skb->len);
727 memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
728 @@ -1933,7 +1938,6 @@ #ifdef VELOCITY_ZERO_COPY_SUPPORT
729 int nfrags = skb_shinfo(skb)->nr_frags;
730 tdinfo->skb = skb;
731 if (nfrags > 6) {
732 - skb_linearize(skb, GFP_ATOMIC);
733 memcpy(tdinfo->buf, skb->data, skb->len);
734 tdinfo->skb_dma[0] = tdinfo->buf_dma;
735 td_ptr->tdesc0.pktsize =
736 diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
737 index 6fd0bf7..75237c1 100644
738 --- a/drivers/net/wireless/orinoco.c
739 +++ b/drivers/net/wireless/orinoco.c
740 @@ -1835,7 +1835,9 @@ static int __orinoco_program_rids(struct
741 /* Set promiscuity / multicast*/
742 priv->promiscuous = 0;
743 priv->mc_count = 0;
744 - __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */
745 +
746 + /* FIXME: what about netif_tx_lock */
747 + __orinoco_set_multicast_list(dev);
749 return 0;
750 }
751 diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
752 index 82cb4af..57cec40 100644
753 --- a/drivers/s390/net/qeth_eddp.c
754 +++ b/drivers/s390/net/qeth_eddp.c
755 @@ -421,7 +421,7 @@ #endif /* CONFIG_QETH_VLAN */
756 }
757 tcph = eddp->skb->h.th;
758 while (eddp->skb_offset < eddp->skb->len) {
759 - data_len = min((int)skb_shinfo(eddp->skb)->tso_size,
760 + data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
761 (int)(eddp->skb->len - eddp->skb_offset));
762 /* prepare qdio hdr */
763 if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){
764 @@ -516,20 +516,20 @@ qeth_eddp_calc_num_pages(struct qeth_edd
766 QETH_DBF_TEXT(trace, 5, "eddpcanp");
767 /* can we put multiple skbs in one page? */
768 - skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len);
769 + skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len);
770 if (skbs_per_page > 1){
771 - ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) /
772 + ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) /
773 skbs_per_page + 1;
774 ctx->elements_per_skb = 1;
775 } else {
776 /* no -> how many elements per skb? */
777 - ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len +
778 + ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len +
779 PAGE_SIZE) >> PAGE_SHIFT;
780 ctx->num_pages = ctx->elements_per_skb *
781 - (skb_shinfo(skb)->tso_segs + 1);
782 + (skb_shinfo(skb)->gso_segs + 1);
783 }
784 ctx->num_elements = ctx->elements_per_skb *
785 - (skb_shinfo(skb)->tso_segs + 1);
786 + (skb_shinfo(skb)->gso_segs + 1);
787 }
789 static inline struct qeth_eddp_context *
790 diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
791 index dba7f7f..a3ea8e0 100644
792 --- a/drivers/s390/net/qeth_main.c
793 +++ b/drivers/s390/net/qeth_main.c
794 @@ -4454,7 +4454,7 @@ qeth_send_packet(struct qeth_card *card,
795 queue = card->qdio.out_qs
796 [qeth_get_priority_queue(card, skb, ipv, cast_type)];
798 - if (skb_shinfo(skb)->tso_size)
799 + if (skb_is_gso(skb))
800 large_send = card->options.large_send;
802 /*are we able to do TSO ? If so ,prepare and send it from here */
803 @@ -4501,8 +4501,7 @@ qeth_send_packet(struct qeth_card *card,
804 card->stats.tx_packets++;
805 card->stats.tx_bytes += skb->len;
806 #ifdef CONFIG_QETH_PERF_STATS
807 - if (skb_shinfo(skb)->tso_size &&
808 - !(large_send == QETH_LARGE_SEND_NO)) {
809 + if (skb_is_gso(skb) && !(large_send == QETH_LARGE_SEND_NO)) {
810 card->perf_stats.large_send_bytes += skb->len;
811 card->perf_stats.large_send_cnt++;
812 }
813 diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
814 index 1286dde..89cbf34 100644
815 --- a/drivers/s390/net/qeth_tso.h
816 +++ b/drivers/s390/net/qeth_tso.h
817 @@ -51,7 +51,7 @@ qeth_tso_fill_header(struct qeth_card *c
818 hdr->ext.hdr_version = 1;
819 hdr->ext.hdr_len = 28;
820 /*insert non-fix values */
821 - hdr->ext.mss = skb_shinfo(skb)->tso_size;
822 + hdr->ext.mss = skb_shinfo(skb)->gso_size;
823 hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4);
824 hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len -
825 sizeof(struct qeth_hdr_tso));
826 diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
827 index 93535f0..9269df7 100644
828 --- a/include/linux/ethtool.h
829 +++ b/include/linux/ethtool.h
830 @@ -408,6 +408,8 @@ #define ETHTOOL_STSO 0x0000001f /* Set
831 #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */
832 #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */
833 #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */
834 +#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */
835 +#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */
837 /* compatibility with older code */
838 #define SPARC_ETH_GSET ETHTOOL_GSET
839 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
840 index 7fda03d..9865736 100644
841 --- a/include/linux/netdevice.h
842 +++ b/include/linux/netdevice.h
843 @@ -230,7 +230,8 @@ enum netdev_state_t
844 __LINK_STATE_SCHED,
845 __LINK_STATE_NOCARRIER,
846 __LINK_STATE_RX_SCHED,
847 - __LINK_STATE_LINKWATCH_PENDING
848 + __LINK_STATE_LINKWATCH_PENDING,
849 + __LINK_STATE_QDISC_RUNNING,
850 };
853 @@ -306,9 +307,17 @@ #define NETIF_F_HW_VLAN_TX 128 /* Transm
854 #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */
855 #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
856 #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
857 -#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
858 +#define NETIF_F_GSO 2048 /* Enable software GSO. */
859 #define NETIF_F_LLTX 4096 /* LockLess TX */
860 -#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/
861 +
862 + /* Segmentation offload features */
863 +#define NETIF_F_GSO_SHIFT 16
864 +#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
865 +#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT)
866 +#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
867 +
868 +#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
869 +#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
871 struct net_device *next_sched;
873 @@ -394,6 +403,9 @@ #define NETIF_F_UFO 8192
874 struct list_head qdisc_list;
875 unsigned long tx_queue_len; /* Max frames per queue allowed */
877 + /* Partially transmitted GSO packet. */
878 + struct sk_buff *gso_skb;
879 +
880 /* ingress path synchronizer */
881 spinlock_t ingress_lock;
882 struct Qdisc *qdisc_ingress;
883 @@ -402,7 +414,7 @@ #define NETIF_F_UFO 8192
884 * One part is mostly used on xmit path (device)
885 */
886 /* hard_start_xmit synchronizer */
887 - spinlock_t xmit_lock ____cacheline_aligned_in_smp;
888 + spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
889 /* cpu id of processor entered to hard_start_xmit or -1,
890 if nobody entered there.
891 */
892 @@ -527,6 +539,9 @@ struct packet_type {
893 struct net_device *,
894 struct packet_type *,
895 struct net_device *);
896 + struct sk_buff *(*gso_segment)(struct sk_buff *skb,
897 + int features);
898 + int (*gso_send_check)(struct sk_buff *skb);
899 void *af_packet_priv;
900 struct list_head list;
901 };
902 @@ -693,7 +708,8 @@ extern int dev_change_name(struct net_d
903 extern int dev_set_mtu(struct net_device *, int);
904 extern int dev_set_mac_address(struct net_device *,
905 struct sockaddr *);
906 -extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
907 +extern int dev_hard_start_xmit(struct sk_buff *skb,
908 + struct net_device *dev);
910 extern void dev_init(void);
912 @@ -900,11 +916,43 @@ static inline void __netif_rx_complete(s
913 clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
914 }
916 +static inline void netif_tx_lock(struct net_device *dev)
917 +{
918 + spin_lock(&dev->_xmit_lock);
919 + dev->xmit_lock_owner = smp_processor_id();
920 +}
921 +
922 +static inline void netif_tx_lock_bh(struct net_device *dev)
923 +{
924 + spin_lock_bh(&dev->_xmit_lock);
925 + dev->xmit_lock_owner = smp_processor_id();
926 +}
927 +
928 +static inline int netif_tx_trylock(struct net_device *dev)
929 +{
930 + int err = spin_trylock(&dev->_xmit_lock);
931 + if (!err)
932 + dev->xmit_lock_owner = smp_processor_id();
933 + return err;
934 +}
935 +
936 +static inline void netif_tx_unlock(struct net_device *dev)
937 +{
938 + dev->xmit_lock_owner = -1;
939 + spin_unlock(&dev->_xmit_lock);
940 +}
941 +
942 +static inline void netif_tx_unlock_bh(struct net_device *dev)
943 +{
944 + dev->xmit_lock_owner = -1;
945 + spin_unlock_bh(&dev->_xmit_lock);
946 +}
947 +
948 static inline void netif_tx_disable(struct net_device *dev)
949 {
950 - spin_lock_bh(&dev->xmit_lock);
951 + netif_tx_lock_bh(dev);
952 netif_stop_queue(dev);
953 - spin_unlock_bh(&dev->xmit_lock);
954 + netif_tx_unlock_bh(dev);
955 }
957 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
958 @@ -932,6 +980,7 @@ extern int netdev_max_backlog;
959 extern int weight_p;
960 extern int netdev_set_master(struct net_device *dev, struct net_device *master);
961 extern int skb_checksum_help(struct sk_buff *skb, int inward);
962 +extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
963 #ifdef CONFIG_BUG
964 extern void netdev_rx_csum_fault(struct net_device *dev);
965 #else
966 @@ -951,6 +1000,19 @@ #endif
968 extern void linkwatch_run_queue(void);
970 +static inline int skb_gso_ok(struct sk_buff *skb, int features)
971 +{
972 + int feature = skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT;
973 + return (features & feature) == feature;
974 +}
975 +
976 +static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
977 +{
978 + return skb_is_gso(skb) &&
979 + (!skb_gso_ok(skb, dev->features) ||
980 + unlikely(skb->ip_summed != CHECKSUM_HW));
981 +}
982 +
983 #endif /* __KERNEL__ */
985 #endif /* _LINUX_DEV_H */
986 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
987 index ad7cc22..adfe3a8 100644
988 --- a/include/linux/skbuff.h
989 +++ b/include/linux/skbuff.h
990 @@ -134,9 +134,10 @@ struct skb_frag_struct {
991 struct skb_shared_info {
992 atomic_t dataref;
993 unsigned short nr_frags;
994 - unsigned short tso_size;
995 - unsigned short tso_segs;
996 - unsigned short ufo_size;
997 + unsigned short gso_size;
998 + /* Warning: this field is not always filled in (UFO)! */
999 + unsigned short gso_segs;
1000 + unsigned short gso_type;
1001 unsigned int ip6_frag_id;
1002 struct sk_buff *frag_list;
1003 skb_frag_t frags[MAX_SKB_FRAGS];
1004 @@ -168,6 +169,14 @@ enum {
1005 SKB_FCLONE_CLONE,
1006 };
1008 +enum {
1009 + SKB_GSO_TCPV4 = 1 << 0,
1010 + SKB_GSO_UDPV4 = 1 << 1,
1012 + /* This indicates the skb is from an untrusted source. */
1013 + SKB_GSO_DODGY = 1 << 2,
1014 +};
1016 /**
1017 * struct sk_buff - socket buffer
1018 * @next: Next buffer in list
1019 @@ -1148,18 +1157,34 @@ static inline int skb_can_coalesce(struc
1020 return 0;
1023 +static inline int __skb_linearize(struct sk_buff *skb)
1024 +{
1025 + return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
1026 +}
1028 /**
1029 * skb_linearize - convert paged skb to linear one
1030 * @skb: buffer to linarize
1031 - * @gfp: allocation mode
1033 * If there is no free memory -ENOMEM is returned, otherwise zero
1034 * is returned and the old skb data released.
1035 */
1036 -extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp);
1037 -static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp)
1038 +static inline int skb_linearize(struct sk_buff *skb)
1039 +{
1040 + return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
1041 +}
1043 +/**
1044 + * skb_linearize_cow - make sure skb is linear and writable
1045 + * @skb: buffer to process
1046 + *
1047 + * If there is no free memory -ENOMEM is returned, otherwise zero
1048 + * is returned and the old skb data released.
1049 + */
1050 +static inline int skb_linearize_cow(struct sk_buff *skb)
1052 - return __skb_linearize(skb, gfp);
1053 + return skb_is_nonlinear(skb) || skb_cloned(skb) ?
1054 + __skb_linearize(skb) : 0;
1057 /**
1058 @@ -1254,6 +1279,7 @@ extern void skb_split(struct sk_b
1059 struct sk_buff *skb1, const u32 len);
1061 extern void skb_release_data(struct sk_buff *skb);
1062 +extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
1064 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
1065 int len, void *buffer)
1066 @@ -1377,5 +1403,10 @@ #else /* CONFIG_NETFILTER */
1067 static inline void nf_reset(struct sk_buff *skb) {}
1068 #endif /* CONFIG_NETFILTER */
1070 +static inline int skb_is_gso(const struct sk_buff *skb)
1071 +{
1072 + return skb_shinfo(skb)->gso_size;
1073 +}
1075 #endif /* __KERNEL__ */
1076 #endif /* _LINUX_SKBUFF_H */
1077 diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
1078 index b94d1ad..75b5b93 100644
1079 --- a/include/net/pkt_sched.h
1080 +++ b/include/net/pkt_sched.h
1081 @@ -218,12 +218,13 @@ extern struct qdisc_rate_table *qdisc_ge
1082 struct rtattr *tab);
1083 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
1085 -extern int qdisc_restart(struct net_device *dev);
1086 +extern void __qdisc_run(struct net_device *dev);
1088 static inline void qdisc_run(struct net_device *dev)
1090 - while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0)
1091 - /* NOTHING */;
1092 + if (!netif_queue_stopped(dev) &&
1093 + !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
1094 + __qdisc_run(dev);
1097 extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1098 diff --git a/include/net/protocol.h b/include/net/protocol.h
1099 index 6dc5970..d516c58 100644
1100 --- a/include/net/protocol.h
1101 +++ b/include/net/protocol.h
1102 @@ -37,6 +37,9 @@ #define MAX_INET_PROTOS 256 /* Must be
1103 struct net_protocol {
1104 int (*handler)(struct sk_buff *skb);
1105 void (*err_handler)(struct sk_buff *skb, u32 info);
1106 + int (*gso_send_check)(struct sk_buff *skb);
1107 + struct sk_buff *(*gso_segment)(struct sk_buff *skb,
1108 + int features);
1109 int no_policy;
1110 };
1112 diff --git a/include/net/sock.h b/include/net/sock.h
1113 index f63d0d5..a8e8d21 100644
1114 --- a/include/net/sock.h
1115 +++ b/include/net/sock.h
1116 @@ -1064,9 +1064,13 @@ static inline void sk_setup_caps(struct
1118 __sk_dst_set(sk, dst);
1119 sk->sk_route_caps = dst->dev->features;
1120 + if (sk->sk_route_caps & NETIF_F_GSO)
1121 + sk->sk_route_caps |= NETIF_F_TSO;
1122 if (sk->sk_route_caps & NETIF_F_TSO) {
1123 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
1124 sk->sk_route_caps &= ~NETIF_F_TSO;
1125 + else
1126 + sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1130 diff --git a/include/net/tcp.h b/include/net/tcp.h
1131 index 77f21c6..22dbbac 100644
1132 --- a/include/net/tcp.h
1133 +++ b/include/net/tcp.h
1134 @@ -552,13 +552,13 @@ #include <net/tcp_ecn.h>
1135 */
1136 static inline int tcp_skb_pcount(const struct sk_buff *skb)
1138 - return skb_shinfo(skb)->tso_segs;
1139 + return skb_shinfo(skb)->gso_segs;
1142 /* This is valid iff tcp_skb_pcount() > 1. */
1143 static inline int tcp_skb_mss(const struct sk_buff *skb)
1145 - return skb_shinfo(skb)->tso_size;
1146 + return skb_shinfo(skb)->gso_size;
1149 static inline void tcp_dec_pcount_approx(__u32 *count,
1150 @@ -1063,6 +1063,9 @@ extern struct request_sock_ops tcp_reque
1152 extern int tcp_v4_destroy_sock(struct sock *sk);
1154 +extern int tcp_v4_gso_send_check(struct sk_buff *skb);
1155 +extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
1157 #ifdef CONFIG_PROC_FS
1158 extern int tcp4_proc_init(void);
1159 extern void tcp4_proc_exit(void);
1160 diff --git a/net/atm/clip.c b/net/atm/clip.c
1161 index 1842a4e..6dc21a7 100644
1162 --- a/net/atm/clip.c
1163 +++ b/net/atm/clip.c
1164 @@ -101,7 +101,7 @@ static void unlink_clip_vcc(struct clip_
1165 printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc);
1166 return;
1168 - spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */
1169 + netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */
1170 entry->neigh->used = jiffies;
1171 for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
1172 if (*walk == clip_vcc) {
1173 @@ -125,7 +125,7 @@ static void unlink_clip_vcc(struct clip_
1174 printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
1175 "0x%p)\n",entry,clip_vcc);
1176 out:
1177 - spin_unlock_bh(&entry->neigh->dev->xmit_lock);
1178 + netif_tx_unlock_bh(entry->neigh->dev);
1181 /* The neighbour entry n->lock is held. */
1182 diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
1183 index 0b33a7b..180e79b 100644
1184 --- a/net/bridge/br_device.c
1185 +++ b/net/bridge/br_device.c
1186 @@ -146,9 +146,9 @@ static int br_set_tx_csum(struct net_dev
1187 struct net_bridge *br = netdev_priv(dev);
1189 if (data)
1190 - br->feature_mask |= NETIF_F_IP_CSUM;
1191 + br->feature_mask |= NETIF_F_NO_CSUM;
1192 else
1193 - br->feature_mask &= ~NETIF_F_IP_CSUM;
1194 + br->feature_mask &= ~NETIF_F_ALL_CSUM;
1196 br_features_recompute(br);
1197 return 0;
1198 @@ -185,6 +185,6 @@ void br_dev_setup(struct net_device *dev
1199 dev->set_mac_address = br_set_mac_address;
1200 dev->priv_flags = IFF_EBRIDGE;
1202 - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
1203 - | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
1204 + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
1205 + NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST;
1207 diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
1208 index 2d24fb4..b34e76f 100644
1209 --- a/net/bridge/br_forward.c
1210 +++ b/net/bridge/br_forward.c
1211 @@ -32,7 +32,7 @@ static inline int should_deliver(const s
1212 int br_dev_queue_push_xmit(struct sk_buff *skb)
1214 /* drop mtu oversized packets except tso */
1215 - if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
1216 + if (skb->len > skb->dev->mtu && !skb_is_gso(skb))
1217 kfree_skb(skb);
1218 else {
1219 #ifdef CONFIG_BRIDGE_NETFILTER
1220 diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
1221 index f36b35e..0617146 100644
1222 --- a/net/bridge/br_if.c
1223 +++ b/net/bridge/br_if.c
1224 @@ -385,17 +385,28 @@ void br_features_recompute(struct net_br
1225 struct net_bridge_port *p;
1226 unsigned long features, checksum;
1228 - features = br->feature_mask &~ NETIF_F_IP_CSUM;
1229 - checksum = br->feature_mask & NETIF_F_IP_CSUM;
1230 + checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0;
1231 + features = br->feature_mask & ~NETIF_F_ALL_CSUM;
1233 list_for_each_entry(p, &br->port_list, list) {
1234 - if (!(p->dev->features
1235 - & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
1236 + unsigned long feature = p->dev->features;
1238 + if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
1239 + checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
1240 + if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
1241 + checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
1242 + if (!(feature & NETIF_F_IP_CSUM))
1243 checksum = 0;
1244 - features &= p->dev->features;
1246 + if (feature & NETIF_F_GSO)
1247 + feature |= NETIF_F_TSO;
1248 + feature |= NETIF_F_GSO;
1250 + features &= feature;
1253 - br->dev->features = features | checksum | NETIF_F_LLTX;
1254 + br->dev->features = features | checksum | NETIF_F_LLTX |
1255 + NETIF_F_GSO_ROBUST;
1258 /* called with RTNL */
1259 diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
1260 index 9e27373..b2dba74 100644
1261 --- a/net/bridge/br_netfilter.c
1262 +++ b/net/bridge/br_netfilter.c
1263 @@ -743,7 +743,7 @@ static int br_nf_dev_queue_xmit(struct s
1265 if (skb->protocol == htons(ETH_P_IP) &&
1266 skb->len > skb->dev->mtu &&
1267 - !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
1268 + !skb_is_gso(skb))
1269 return ip_fragment(skb, br_dev_queue_push_xmit);
1270 else
1271 return br_dev_queue_push_xmit(skb);
1272 diff --git a/net/core/dev.c b/net/core/dev.c
1273 index 12a214c..e814a89 100644
1274 --- a/net/core/dev.c
1275 +++ b/net/core/dev.c
1276 @@ -115,6 +115,7 @@ #include <linux/wireless.h> /* Note : w
1277 #include <net/iw_handler.h>
1278 #endif /* CONFIG_NET_RADIO */
1279 #include <asm/current.h>
1280 +#include <linux/err.h>
1282 /*
1283 * The list of packet types we will receive (as opposed to discard)
1284 @@ -1032,7 +1033,7 @@ static inline void net_timestamp(struct
1285 * taps currently in use.
1286 */
1288 -void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1289 +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1291 struct packet_type *ptype;
1293 @@ -1082,9 +1083,17 @@ int skb_checksum_help(struct sk_buff *sk
1294 unsigned int csum;
1295 int ret = 0, offset = skb->h.raw - skb->data;
1297 - if (inward) {
1298 - skb->ip_summed = CHECKSUM_NONE;
1299 - goto out;
1300 + if (inward)
1301 + goto out_set_summed;
1303 + if (unlikely(skb_shinfo(skb)->gso_size)) {
1304 + static int warned;
1306 + WARN_ON(!warned);
1307 + warned = 1;
1309 + /* Let GSO fix up the checksum. */
1310 + goto out_set_summed;
1313 if (skb_cloned(skb)) {
1314 @@ -1101,11 +1110,70 @@ int skb_checksum_help(struct sk_buff *sk
1315 BUG_ON(skb->csum + 2 > offset);
1317 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1319 +out_set_summed:
1320 skb->ip_summed = CHECKSUM_NONE;
1321 out:
1322 return ret;
1325 +/**
1326 + * skb_gso_segment - Perform segmentation on skb.
1327 + * @skb: buffer to segment
1328 + * @features: features for the output path (see dev->features)
1329 + *
1330 + * This function segments the given skb and returns a list of segments.
1331 + *
1332 + * It may return NULL if the skb requires no segmentation. This is
1333 + * only possible when GSO is used for verifying header integrity.
1334 + */
1335 +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1336 +{
1337 + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1338 + struct packet_type *ptype;
1339 + int type = skb->protocol;
1340 + int err;
1342 + BUG_ON(skb_shinfo(skb)->frag_list);
1344 + skb->mac.raw = skb->data;
1345 + skb->mac_len = skb->nh.raw - skb->data;
1346 + __skb_pull(skb, skb->mac_len);
1348 + if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
1349 + static int warned;
1351 + WARN_ON(!warned);
1352 + warned = 1;
1354 + if (skb_header_cloned(skb) &&
1355 + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1356 + return ERR_PTR(err);
1357 + }
1359 + rcu_read_lock();
1360 + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1361 + if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1362 + if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
1363 + err = ptype->gso_send_check(skb);
1364 + segs = ERR_PTR(err);
1365 + if (err || skb_gso_ok(skb, features))
1366 + break;
1367 + __skb_push(skb, skb->data - skb->nh.raw);
1368 + }
1369 + segs = ptype->gso_segment(skb, features);
1370 + break;
1371 + }
1372 + }
1373 + rcu_read_unlock();
1375 + __skb_push(skb, skb->data - skb->mac.raw);
1377 + return segs;
1378 +}
1380 +EXPORT_SYMBOL(skb_gso_segment);
1382 /* Take action when hardware reception checksum errors are detected. */
1383 #ifdef CONFIG_BUG
1384 void netdev_rx_csum_fault(struct net_device *dev)
1385 @@ -1142,75 +1210,108 @@ #else
1386 #define illegal_highdma(dev, skb) (0)
1387 #endif
1389 -/* Keep head the same: replace data */
1390 -int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1391 -{
1392 - unsigned int size;
1393 - u8 *data;
1394 - long offset;
1395 - struct skb_shared_info *ninfo;
1396 - int headerlen = skb->data - skb->head;
1397 - int expand = (skb->tail + skb->data_len) - skb->end;
1399 - if (skb_shared(skb))
1400 - BUG();
1402 - if (expand <= 0)
1403 - expand = 0;
1405 - size = skb->end - skb->head + expand;
1406 - size = SKB_DATA_ALIGN(size);
1407 - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1408 - if (!data)
1409 - return -ENOMEM;
1411 - /* Copy entire thing */
1412 - if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1413 - BUG();
1415 - /* Set up shinfo */
1416 - ninfo = (struct skb_shared_info*)(data + size);
1417 - atomic_set(&ninfo->dataref, 1);
1418 - ninfo->tso_size = skb_shinfo(skb)->tso_size;
1419 - ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1420 - ninfo->nr_frags = 0;
1421 - ninfo->frag_list = NULL;
1423 - /* Offset between the two in bytes */
1424 - offset = data - skb->head;
1426 - /* Free old data. */
1427 - skb_release_data(skb);
1429 - skb->head = data;
1430 - skb->end = data + size;
1432 - /* Set up new pointers */
1433 - skb->h.raw += offset;
1434 - skb->nh.raw += offset;
1435 - skb->mac.raw += offset;
1436 - skb->tail += offset;
1437 - skb->data += offset;
1439 - /* We are no longer a clone, even if we were. */
1440 - skb->cloned = 0;
1442 - skb->tail += skb->data_len;
1443 - skb->data_len = 0;
1444 +struct dev_gso_cb {
1445 + void (*destructor)(struct sk_buff *skb);
1446 +};
1448 +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1450 +static void dev_gso_skb_destructor(struct sk_buff *skb)
1451 +{
1452 + struct dev_gso_cb *cb;
1454 + do {
1455 + struct sk_buff *nskb = skb->next;
1457 + skb->next = nskb->next;
1458 + nskb->next = NULL;
1459 + kfree_skb(nskb);
1460 + } while (skb->next);
1462 + cb = DEV_GSO_CB(skb);
1463 + if (cb->destructor)
1464 + cb->destructor(skb);
1465 +}
1467 +/**
1468 + * dev_gso_segment - Perform emulated hardware segmentation on skb.
1469 + * @skb: buffer to segment
1470 + *
1471 + * This function segments the given skb and stores the list of segments
1472 + * in skb->next.
1473 + */
1474 +static int dev_gso_segment(struct sk_buff *skb)
1475 +{
1476 + struct net_device *dev = skb->dev;
1477 + struct sk_buff *segs;
1478 + int features = dev->features & ~(illegal_highdma(dev, skb) ?
1479 + NETIF_F_SG : 0);
1481 + segs = skb_gso_segment(skb, features);
1483 + /* Verifying header integrity only. */
1484 + if (!segs)
1485 + return 0;
1487 + if (unlikely(IS_ERR(segs)))
1488 + return PTR_ERR(segs);
1490 + skb->next = segs;
1491 + DEV_GSO_CB(skb)->destructor = skb->destructor;
1492 + skb->destructor = dev_gso_skb_destructor;
1494 + return 0;
1495 +}
1497 +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1498 +{
1499 + if (likely(!skb->next)) {
1500 + if (netdev_nit)
1501 + dev_queue_xmit_nit(skb, dev);
1503 + if (netif_needs_gso(dev, skb)) {
1504 + if (unlikely(dev_gso_segment(skb)))
1505 + goto out_kfree_skb;
1506 + if (skb->next)
1507 + goto gso;
1508 + }
1510 + return dev->hard_start_xmit(skb, dev);
1511 + }
1513 +gso:
1514 + do {
1515 + struct sk_buff *nskb = skb->next;
1516 + int rc;
1518 + skb->next = nskb->next;
1519 + nskb->next = NULL;
1520 + rc = dev->hard_start_xmit(nskb, dev);
1521 + if (unlikely(rc)) {
1522 + nskb->next = skb->next;
1523 + skb->next = nskb;
1524 + return rc;
1525 + }
1526 + if (unlikely(netif_queue_stopped(dev) && skb->next))
1527 + return NETDEV_TX_BUSY;
1528 + } while (skb->next);
1530 + skb->destructor = DEV_GSO_CB(skb)->destructor;
1532 +out_kfree_skb:
1533 + kfree_skb(skb);
1534 return 0;
1537 #define HARD_TX_LOCK(dev, cpu) { \
1538 if ((dev->features & NETIF_F_LLTX) == 0) { \
1539 - spin_lock(&dev->xmit_lock); \
1540 - dev->xmit_lock_owner = cpu; \
1541 + netif_tx_lock(dev); \
1542 } \
1545 #define HARD_TX_UNLOCK(dev) { \
1546 if ((dev->features & NETIF_F_LLTX) == 0) { \
1547 - dev->xmit_lock_owner = -1; \
1548 - spin_unlock(&dev->xmit_lock); \
1549 + netif_tx_unlock(dev); \
1550 } \
1553 @@ -1246,9 +1347,13 @@ int dev_queue_xmit(struct sk_buff *skb)
1554 struct Qdisc *q;
1555 int rc = -ENOMEM;
1557 + /* GSO will handle the following emulations directly. */
1558 + if (netif_needs_gso(dev, skb))
1559 + goto gso;
1561 if (skb_shinfo(skb)->frag_list &&
1562 !(dev->features & NETIF_F_FRAGLIST) &&
1563 - __skb_linearize(skb, GFP_ATOMIC))
1564 + __skb_linearize(skb))
1565 goto out_kfree_skb;
1567 /* Fragmented skb is linearized if device does not support SG,
1568 @@ -1257,25 +1362,26 @@ int dev_queue_xmit(struct sk_buff *skb)
1569 */
1570 if (skb_shinfo(skb)->nr_frags &&
1571 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1572 - __skb_linearize(skb, GFP_ATOMIC))
1573 + __skb_linearize(skb))
1574 goto out_kfree_skb;
1576 /* If packet is not checksummed and device does not support
1577 * checksumming for this protocol, complete checksumming here.
1578 */
1579 if (skb->ip_summed == CHECKSUM_HW &&
1580 - (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1581 + (!(dev->features & NETIF_F_GEN_CSUM) &&
1582 (!(dev->features & NETIF_F_IP_CSUM) ||
1583 skb->protocol != htons(ETH_P_IP))))
1584 if (skb_checksum_help(skb, 0))
1585 goto out_kfree_skb;
1587 +gso:
1588 spin_lock_prefetch(&dev->queue_lock);
1590 /* Disable soft irqs for various locks below. Also
1591 * stops preemption for RCU.
1592 */
1593 - local_bh_disable();
1594 + rcu_read_lock_bh();
1596 /* Updates of qdisc are serialized by queue_lock.
1597 * The struct Qdisc which is pointed to by qdisc is now a
1598 @@ -1309,8 +1415,8 @@ #endif
1599 /* The device has no queue. Common case for software devices:
1600 loopback, all the sorts of tunnels...
1602 - Really, it is unlikely that xmit_lock protection is necessary here.
1603 - (f.e. loopback and IP tunnels are clean ignoring statistics
1604 + Really, it is unlikely that netif_tx_lock protection is necessary
1605 + here. (f.e. loopback and IP tunnels are clean ignoring statistics
1606 counters.)
1607 However, it is possible, that they rely on protection
1608 made by us here.
1609 @@ -1326,11 +1432,8 @@ #endif
1610 HARD_TX_LOCK(dev, cpu);
1612 if (!netif_queue_stopped(dev)) {
1613 - if (netdev_nit)
1614 - dev_queue_xmit_nit(skb, dev);
1616 rc = 0;
1617 - if (!dev->hard_start_xmit(skb, dev)) {
1618 + if (!dev_hard_start_xmit(skb, dev)) {
1619 HARD_TX_UNLOCK(dev);
1620 goto out;
1622 @@ -1349,13 +1452,13 @@ #endif
1625 rc = -ENETDOWN;
1626 - local_bh_enable();
1627 + rcu_read_unlock_bh();
1629 out_kfree_skb:
1630 kfree_skb(skb);
1631 return rc;
1632 out:
1633 - local_bh_enable();
1634 + rcu_read_unlock_bh();
1635 return rc;
1638 @@ -2670,7 +2773,7 @@ int register_netdevice(struct net_device
1639 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
1641 spin_lock_init(&dev->queue_lock);
1642 - spin_lock_init(&dev->xmit_lock);
1643 + spin_lock_init(&dev->_xmit_lock);
1644 dev->xmit_lock_owner = -1;
1645 #ifdef CONFIG_NET_CLS_ACT
1646 spin_lock_init(&dev->ingress_lock);
1647 @@ -2714,9 +2817,7 @@ #endif
1649 /* Fix illegal SG+CSUM combinations. */
1650 if ((dev->features & NETIF_F_SG) &&
1651 - !(dev->features & (NETIF_F_IP_CSUM |
1652 - NETIF_F_NO_CSUM |
1653 - NETIF_F_HW_CSUM))) {
1654 + !(dev->features & NETIF_F_ALL_CSUM)) {
1655 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
1656 dev->name);
1657 dev->features &= ~NETIF_F_SG;
1658 @@ -3268,7 +3369,6 @@ subsys_initcall(net_dev_init);
1659 EXPORT_SYMBOL(__dev_get_by_index);
1660 EXPORT_SYMBOL(__dev_get_by_name);
1661 EXPORT_SYMBOL(__dev_remove_pack);
1662 -EXPORT_SYMBOL(__skb_linearize);
1663 EXPORT_SYMBOL(dev_valid_name);
1664 EXPORT_SYMBOL(dev_add_pack);
1665 EXPORT_SYMBOL(dev_alloc_name);
1666 diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
1667 index 05d6085..c57d887 100644
1668 --- a/net/core/dev_mcast.c
1669 +++ b/net/core/dev_mcast.c
1670 @@ -62,7 +62,7 @@ #include <net/arp.h>
1671 * Device mc lists are changed by bh at least if IPv6 is enabled,
1672 * so that it must be bh protected.
1674 - * We block accesses to device mc filters with dev->xmit_lock.
1675 + * We block accesses to device mc filters with netif_tx_lock.
1676 */
1678 /*
1679 @@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_d
1681 void dev_mc_upload(struct net_device *dev)
1683 - spin_lock_bh(&dev->xmit_lock);
1684 + netif_tx_lock_bh(dev);
1685 __dev_mc_upload(dev);
1686 - spin_unlock_bh(&dev->xmit_lock);
1687 + netif_tx_unlock_bh(dev);
1690 /*
1691 @@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev
1692 int err = 0;
1693 struct dev_mc_list *dmi, **dmip;
1695 - spin_lock_bh(&dev->xmit_lock);
1696 + netif_tx_lock_bh(dev);
1698 for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
1699 /*
1700 @@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev
1701 */
1702 __dev_mc_upload(dev);
1704 - spin_unlock_bh(&dev->xmit_lock);
1705 + netif_tx_unlock_bh(dev);
1706 return 0;
1709 err = -ENOENT;
1710 done:
1711 - spin_unlock_bh(&dev->xmit_lock);
1712 + netif_tx_unlock_bh(dev);
1713 return err;
1716 @@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, v
1718 dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
1720 - spin_lock_bh(&dev->xmit_lock);
1721 + netif_tx_lock_bh(dev);
1722 for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
1723 if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
1724 dmi->dmi_addrlen == alen) {
1725 @@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, v
1728 if ((dmi = dmi1) == NULL) {
1729 - spin_unlock_bh(&dev->xmit_lock);
1730 + netif_tx_unlock_bh(dev);
1731 return -ENOMEM;
1733 memcpy(dmi->dmi_addr, addr, alen);
1734 @@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, v
1736 __dev_mc_upload(dev);
1738 - spin_unlock_bh(&dev->xmit_lock);
1739 + netif_tx_unlock_bh(dev);
1740 return 0;
1742 done:
1743 - spin_unlock_bh(&dev->xmit_lock);
1744 + netif_tx_unlock_bh(dev);
1745 kfree(dmi1);
1746 return err;
1748 @@ -204,7 +204,7 @@ done:
1750 void dev_mc_discard(struct net_device *dev)
1752 - spin_lock_bh(&dev->xmit_lock);
1753 + netif_tx_lock_bh(dev);
1755 while (dev->mc_list != NULL) {
1756 struct dev_mc_list *tmp = dev->mc_list;
1757 @@ -215,7 +215,7 @@ void dev_mc_discard(struct net_device *d
1759 dev->mc_count = 0;
1761 - spin_unlock_bh(&dev->xmit_lock);
1762 + netif_tx_unlock_bh(dev);
1765 #ifdef CONFIG_PROC_FS
1766 @@ -250,7 +250,7 @@ static int dev_mc_seq_show(struct seq_fi
1767 struct dev_mc_list *m;
1768 struct net_device *dev = v;
1770 - spin_lock_bh(&dev->xmit_lock);
1771 + netif_tx_lock_bh(dev);
1772 for (m = dev->mc_list; m; m = m->next) {
1773 int i;
1775 @@ -262,7 +262,7 @@ static int dev_mc_seq_show(struct seq_fi
1777 seq_putc(seq, '\n');
1779 - spin_unlock_bh(&dev->xmit_lock);
1780 + netif_tx_unlock_bh(dev);
1781 return 0;
1784 diff --git a/net/core/ethtool.c b/net/core/ethtool.c
1785 index e6f7610..27ce168 100644
1786 --- a/net/core/ethtool.c
1787 +++ b/net/core/ethtool.c
1788 @@ -30,7 +30,7 @@ u32 ethtool_op_get_link(struct net_devic
1790 u32 ethtool_op_get_tx_csum(struct net_device *dev)
1792 - return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
1793 + return (dev->features & NETIF_F_ALL_CSUM) != 0;
1796 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
1797 @@ -551,9 +551,7 @@ static int ethtool_set_sg(struct net_dev
1798 return -EFAULT;
1800 if (edata.data &&
1801 - !(dev->features & (NETIF_F_IP_CSUM |
1802 - NETIF_F_NO_CSUM |
1803 - NETIF_F_HW_CSUM)))
1804 + !(dev->features & NETIF_F_ALL_CSUM))
1805 return -EINVAL;
1807 return __ethtool_set_sg(dev, edata.data);
1808 @@ -591,7 +589,7 @@ static int ethtool_set_tso(struct net_de
1810 static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
1812 - struct ethtool_value edata = { ETHTOOL_GTSO };
1813 + struct ethtool_value edata = { ETHTOOL_GUFO };
1815 if (!dev->ethtool_ops->get_ufo)
1816 return -EOPNOTSUPP;
1817 @@ -600,6 +598,7 @@ static int ethtool_get_ufo(struct net_de
1818 return -EFAULT;
1819 return 0;
1822 static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
1824 struct ethtool_value edata;
1825 @@ -615,6 +614,29 @@ static int ethtool_set_ufo(struct net_de
1826 return dev->ethtool_ops->set_ufo(dev, edata.data);
1829 +static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
1830 +{
1831 + struct ethtool_value edata = { ETHTOOL_GGSO };
1833 + edata.data = dev->features & NETIF_F_GSO;
1834 + if (copy_to_user(useraddr, &edata, sizeof(edata)))
1835 + return -EFAULT;
1836 + return 0;
1837 +}
1839 +static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
1840 +{
1841 + struct ethtool_value edata;
1843 + if (copy_from_user(&edata, useraddr, sizeof(edata)))
1844 + return -EFAULT;
1845 + if (edata.data)
1846 + dev->features |= NETIF_F_GSO;
1847 + else
1848 + dev->features &= ~NETIF_F_GSO;
1849 + return 0;
1850 +}
1852 static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
1854 struct ethtool_test test;
1855 @@ -906,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr)
1856 case ETHTOOL_SUFO:
1857 rc = ethtool_set_ufo(dev, useraddr);
1858 break;
1859 + case ETHTOOL_GGSO:
1860 + rc = ethtool_get_gso(dev, useraddr);
1861 + break;
1862 + case ETHTOOL_SGSO:
1863 + rc = ethtool_set_gso(dev, useraddr);
1864 + break;
1865 default:
1866 rc = -EOPNOTSUPP;
1868 diff --git a/net/core/netpoll.c b/net/core/netpoll.c
1869 index ea51f8d..ec28d3b 100644
1870 --- a/net/core/netpoll.c
1871 +++ b/net/core/netpoll.c
1872 @@ -273,24 +273,21 @@ static void netpoll_send_skb(struct netp
1874 do {
1875 npinfo->tries--;
1876 - spin_lock(&np->dev->xmit_lock);
1877 - np->dev->xmit_lock_owner = smp_processor_id();
1878 + netif_tx_lock(np->dev);
1880 /*
1881 * network drivers do not expect to be called if the queue is
1882 * stopped.
1883 */
1884 if (netif_queue_stopped(np->dev)) {
1885 - np->dev->xmit_lock_owner = -1;
1886 - spin_unlock(&np->dev->xmit_lock);
1887 + netif_tx_unlock(np->dev);
1888 netpoll_poll(np);
1889 udelay(50);
1890 continue;
1893 status = np->dev->hard_start_xmit(skb, np->dev);
1894 - np->dev->xmit_lock_owner = -1;
1895 - spin_unlock(&np->dev->xmit_lock);
1896 + netif_tx_unlock(np->dev);
1898 /* success */
1899 if(!status) {
1900 diff --git a/net/core/pktgen.c b/net/core/pktgen.c
1901 index da16f8f..2380347 100644
1902 --- a/net/core/pktgen.c
1903 +++ b/net/core/pktgen.c
1904 @@ -2582,7 +2582,7 @@ static __inline__ void pktgen_xmit(struc
1908 - spin_lock_bh(&odev->xmit_lock);
1909 + netif_tx_lock_bh(odev);
1910 if (!netif_queue_stopped(odev)) {
1912 atomic_inc(&(pkt_dev->skb->users));
1913 @@ -2627,7 +2627,7 @@ retry_now:
1914 pkt_dev->next_tx_ns = 0;
1917 - spin_unlock_bh(&odev->xmit_lock);
1918 + netif_tx_unlock_bh(odev);
1920 /* If pkt_dev->count is zero, then run forever */
1921 if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
1922 diff --git a/net/core/skbuff.c b/net/core/skbuff.c
1923 index 2144952..46f56af 100644
1924 --- a/net/core/skbuff.c
1925 +++ b/net/core/skbuff.c
1926 @@ -164,9 +164,9 @@ struct sk_buff *__alloc_skb(unsigned int
1927 shinfo = skb_shinfo(skb);
1928 atomic_set(&shinfo->dataref, 1);
1929 shinfo->nr_frags = 0;
1930 - shinfo->tso_size = 0;
1931 - shinfo->tso_segs = 0;
1932 - shinfo->ufo_size = 0;
1933 + shinfo->gso_size = 0;
1934 + shinfo->gso_segs = 0;
1935 + shinfo->gso_type = 0;
1936 shinfo->ip6_frag_id = 0;
1937 shinfo->frag_list = NULL;
1939 @@ -230,8 +230,9 @@ struct sk_buff *alloc_skb_from_cache(kme
1941 atomic_set(&(skb_shinfo(skb)->dataref), 1);
1942 skb_shinfo(skb)->nr_frags = 0;
1943 - skb_shinfo(skb)->tso_size = 0;
1944 - skb_shinfo(skb)->tso_segs = 0;
1945 + skb_shinfo(skb)->gso_size = 0;
1946 + skb_shinfo(skb)->gso_segs = 0;
1947 + skb_shinfo(skb)->gso_type = 0;
1948 skb_shinfo(skb)->frag_list = NULL;
1949 out:
1950 return skb;
1951 @@ -501,8 +502,9 @@ #endif
1952 new->tc_index = old->tc_index;
1953 #endif
1954 atomic_set(&new->users, 1);
1955 - skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
1956 - skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
1957 + skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
1958 + skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
1959 + skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
1962 /**
1963 @@ -1777,6 +1779,133 @@ int skb_append_datato_frags(struct sock
1964 return 0;
1967 +/**
1968 + * skb_segment - Perform protocol segmentation on skb.
1969 + * @skb: buffer to segment
1970 + * @features: features for the output path (see dev->features)
1971 + *
1972 + * This function performs segmentation on the given skb. It returns
1973 + * the segment at the given position. It returns NULL if there are
1974 + * no more segments to generate, or when an error is encountered.
1975 + */
1976 +struct sk_buff *skb_segment(struct sk_buff *skb, int features)
1977 +{
1978 + struct sk_buff *segs = NULL;
1979 + struct sk_buff *tail = NULL;
1980 + unsigned int mss = skb_shinfo(skb)->gso_size;
1981 + unsigned int doffset = skb->data - skb->mac.raw;
1982 + unsigned int offset = doffset;
1983 + unsigned int headroom;
1984 + unsigned int len;
1985 + int sg = features & NETIF_F_SG;
1986 + int nfrags = skb_shinfo(skb)->nr_frags;
1987 + int err = -ENOMEM;
1988 + int i = 0;
1989 + int pos;
1991 + __skb_push(skb, doffset);
1992 + headroom = skb_headroom(skb);
1993 + pos = skb_headlen(skb);
1995 + do {
1996 + struct sk_buff *nskb;
1997 + skb_frag_t *frag;
1998 + int hsize, nsize;
1999 + int k;
2000 + int size;
2002 + len = skb->len - offset;
2003 + if (len > mss)
2004 + len = mss;
2006 + hsize = skb_headlen(skb) - offset;
2007 + if (hsize < 0)
2008 + hsize = 0;
2009 + nsize = hsize + doffset;
2010 + if (nsize > len + doffset || !sg)
2011 + nsize = len + doffset;
2013 + nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
2014 + if (unlikely(!nskb))
2015 + goto err;
2017 + if (segs)
2018 + tail->next = nskb;
2019 + else
2020 + segs = nskb;
2021 + tail = nskb;
2023 + nskb->dev = skb->dev;
2024 + nskb->priority = skb->priority;
2025 + nskb->protocol = skb->protocol;
2026 + nskb->dst = dst_clone(skb->dst);
2027 + memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
2028 + nskb->pkt_type = skb->pkt_type;
2029 + nskb->mac_len = skb->mac_len;
2031 + skb_reserve(nskb, headroom);
2032 + nskb->mac.raw = nskb->data;
2033 + nskb->nh.raw = nskb->data + skb->mac_len;
2034 + nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
2035 + memcpy(skb_put(nskb, doffset), skb->data, doffset);
2037 + if (!sg) {
2038 + nskb->csum = skb_copy_and_csum_bits(skb, offset,
2039 + skb_put(nskb, len),
2040 + len, 0);
2041 + continue;
2042 + }
2044 + frag = skb_shinfo(nskb)->frags;
2045 + k = 0;
2047 + nskb->ip_summed = CHECKSUM_HW;
2048 + nskb->csum = skb->csum;
2049 + memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
2051 + while (pos < offset + len) {
2052 + BUG_ON(i >= nfrags);
2054 + *frag = skb_shinfo(skb)->frags[i];
2055 + get_page(frag->page);
2056 + size = frag->size;
2058 + if (pos < offset) {
2059 + frag->page_offset += offset - pos;
2060 + frag->size -= offset - pos;
2061 + }
2063 + k++;
2065 + if (pos + size <= offset + len) {
2066 + i++;
2067 + pos += size;
2068 + } else {
2069 + frag->size -= pos + size - (offset + len);
2070 + break;
2071 + }
2073 + frag++;
2074 + }
2076 + skb_shinfo(nskb)->nr_frags = k;
2077 + nskb->data_len = len - hsize;
2078 + nskb->len += nskb->data_len;
2079 + nskb->truesize += nskb->data_len;
2080 + } while ((offset += len) < skb->len);
2082 + return segs;
2084 +err:
2085 + while ((skb = segs)) {
2086 + segs = skb->next;
2087 + kfree(skb);
2088 + }
2089 + return ERR_PTR(err);
2090 +}
2092 +EXPORT_SYMBOL_GPL(skb_segment);
2094 void __init skb_init(void)
2096 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
2097 diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
2098 index 44bda85..2e3323a 100644
2099 --- a/net/decnet/dn_nsp_in.c
2100 +++ b/net/decnet/dn_nsp_in.c
2101 @@ -801,8 +801,7 @@ got_it:
2102 * We linearize everything except data segments here.
2103 */
2104 if (cb->nsp_flags & ~0x60) {
2105 - if (unlikely(skb_is_nonlinear(skb)) &&
2106 - skb_linearize(skb, GFP_ATOMIC) != 0)
2107 + if (unlikely(skb_linearize(skb)))
2108 goto free_out;
2111 diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
2112 index 3407f19..a0a25e0 100644
2113 --- a/net/decnet/dn_route.c
2114 +++ b/net/decnet/dn_route.c
2115 @@ -629,8 +629,7 @@ int dn_route_rcv(struct sk_buff *skb, st
2116 padlen);
2118 if (flags & DN_RT_PKT_CNTL) {
2119 - if (unlikely(skb_is_nonlinear(skb)) &&
2120 - skb_linearize(skb, GFP_ATOMIC) != 0)
2121 + if (unlikely(skb_linearize(skb)))
2122 goto dump_it;
2124 switch(flags & DN_RT_CNTL_MSK) {
2125 diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
2126 index 97c276f..0a8c559 100644
2127 --- a/net/ipv4/af_inet.c
2128 +++ b/net/ipv4/af_inet.c
2129 @@ -68,6 +68,7 @@
2130 */
2132 #include <linux/config.h>
2133 +#include <linux/err.h>
2134 #include <linux/errno.h>
2135 #include <linux/types.h>
2136 #include <linux/socket.h>
2137 @@ -1084,6 +1085,88 @@ int inet_sk_rebuild_header(struct sock *
2139 EXPORT_SYMBOL(inet_sk_rebuild_header);
2141 +static int inet_gso_send_check(struct sk_buff *skb)
2142 +{
2143 + struct iphdr *iph;
2144 + struct net_protocol *ops;
2145 + int proto;
2146 + int ihl;
2147 + int err = -EINVAL;
2149 + if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
2150 + goto out;
2152 + iph = skb->nh.iph;
2153 + ihl = iph->ihl * 4;
2154 + if (ihl < sizeof(*iph))
2155 + goto out;
2157 + if (unlikely(!pskb_may_pull(skb, ihl)))
2158 + goto out;
2160 + skb->h.raw = __skb_pull(skb, ihl);
2161 + iph = skb->nh.iph;
2162 + proto = iph->protocol & (MAX_INET_PROTOS - 1);
2163 + err = -EPROTONOSUPPORT;
2165 + rcu_read_lock();
2166 + ops = rcu_dereference(inet_protos[proto]);
2167 + if (likely(ops && ops->gso_send_check))
2168 + err = ops->gso_send_check(skb);
2169 + rcu_read_unlock();
2171 +out:
2172 + return err;
2173 +}
2175 +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
2176 +{
2177 + struct sk_buff *segs = ERR_PTR(-EINVAL);
2178 + struct iphdr *iph;
2179 + struct net_protocol *ops;
2180 + int proto;
2181 + int ihl;
2182 + int id;
2184 + if (!pskb_may_pull(skb, sizeof(*iph)))
2185 + goto out;
2187 + iph = skb->nh.iph;
2188 + ihl = iph->ihl * 4;
2189 + if (ihl < sizeof(*iph))
2190 + goto out;
2192 + if (!pskb_may_pull(skb, ihl))
2193 + goto out;
2195 + skb->h.raw = __skb_pull(skb, ihl);
2196 + iph = skb->nh.iph;
2197 + id = ntohs(iph->id);
2198 + proto = iph->protocol & (MAX_INET_PROTOS - 1);
2199 + segs = ERR_PTR(-EPROTONOSUPPORT);
2201 + rcu_read_lock();
2202 + ops = rcu_dereference(inet_protos[proto]);
2203 + if (ops && ops->gso_segment)
2204 + segs = ops->gso_segment(skb, features);
2205 + rcu_read_unlock();
2207 + if (!segs || unlikely(IS_ERR(segs)))
2208 + goto out;
2210 + skb = segs;
2211 + do {
2212 + iph = skb->nh.iph;
2213 + iph->id = htons(id++);
2214 + iph->tot_len = htons(skb->len - skb->mac_len);
2215 + iph->check = 0;
2216 + iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
2217 + } while ((skb = skb->next));
2219 +out:
2220 + return segs;
2221 +}
2223 #ifdef CONFIG_IP_MULTICAST
2224 static struct net_protocol igmp_protocol = {
2225 .handler = igmp_rcv,
2226 @@ -1093,6 +1176,8 @@ #endif
2227 static struct net_protocol tcp_protocol = {
2228 .handler = tcp_v4_rcv,
2229 .err_handler = tcp_v4_err,
2230 + .gso_send_check = tcp_v4_gso_send_check,
2231 + .gso_segment = tcp_tso_segment,
2232 .no_policy = 1,
2233 };
2235 @@ -1138,6 +1223,8 @@ static int ipv4_proc_init(void);
2236 static struct packet_type ip_packet_type = {
2237 .type = __constant_htons(ETH_P_IP),
2238 .func = ip_rcv,
2239 + .gso_send_check = inet_gso_send_check,
2240 + .gso_segment = inet_gso_segment,
2241 };
2243 static int __init inet_init(void)
2244 diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
2245 index 8dcba38..2de887c 100644
2246 --- a/net/ipv4/ip_output.c
2247 +++ b/net/ipv4/ip_output.c
2248 @@ -210,8 +210,7 @@ #if defined(CONFIG_NETFILTER) && defined
2249 return dst_output(skb);
2251 #endif
2252 - if (skb->len > dst_mtu(skb->dst) &&
2253 - !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
2254 + if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
2255 return ip_fragment(skb, ip_finish_output2);
2256 else
2257 return ip_finish_output2(skb);
2258 @@ -362,7 +361,7 @@ packet_routed:
2261 ip_select_ident_more(iph, &rt->u.dst, sk,
2262 - (skb_shinfo(skb)->tso_segs ?: 1) - 1);
2263 + (skb_shinfo(skb)->gso_segs ?: 1) - 1);
2265 /* Add an IP checksum. */
2266 ip_send_check(iph);
2267 @@ -743,7 +742,8 @@ static inline int ip_ufo_append_data(str
2268 (length - transhdrlen));
2269 if (!err) {
2270 /* specify the length of each IP datagram fragment*/
2271 - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
2272 + skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
2273 + skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
2274 __skb_queue_tail(&sk->sk_write_queue, skb);
2276 return 0;
2277 @@ -839,7 +839,7 @@ int ip_append_data(struct sock *sk,
2278 */
2279 if (transhdrlen &&
2280 length + fragheaderlen <= mtu &&
2281 - rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
2282 + rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
2283 !exthdrlen)
2284 csummode = CHECKSUM_HW;
2286 @@ -1086,14 +1086,16 @@ ssize_t ip_append_page(struct sock *sk,
2288 inet->cork.length += size;
2289 if ((sk->sk_protocol == IPPROTO_UDP) &&
2290 - (rt->u.dst.dev->features & NETIF_F_UFO))
2291 - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
2292 + (rt->u.dst.dev->features & NETIF_F_UFO)) {
2293 + skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
2294 + skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
2295 + }
2298 while (size > 0) {
2299 int i;
2301 - if (skb_shinfo(skb)->ufo_size)
2302 + if (skb_is_gso(skb))
2303 len = size;
2304 else {
2306 diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
2307 index d64e2ec..7494823 100644
2308 --- a/net/ipv4/ipcomp.c
2309 +++ b/net/ipv4/ipcomp.c
2310 @@ -84,7 +84,7 @@ static int ipcomp_input(struct xfrm_stat
2311 struct xfrm_decap_state *decap, struct sk_buff *skb)
2313 u8 nexthdr;
2314 - int err = 0;
2315 + int err = -ENOMEM;
2316 struct iphdr *iph;
2317 union {
2318 struct iphdr iph;
2319 @@ -92,11 +92,8 @@ static int ipcomp_input(struct xfrm_stat
2320 } tmp_iph;
2323 - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
2324 - skb_linearize(skb, GFP_ATOMIC) != 0) {
2325 - err = -ENOMEM;
2326 + if (skb_linearize_cow(skb))
2327 goto out;
2328 - }
2330 skb->ip_summed = CHECKSUM_NONE;
2332 @@ -171,10 +168,8 @@ static int ipcomp_output(struct xfrm_sta
2333 goto out_ok;
2336 - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
2337 - skb_linearize(skb, GFP_ATOMIC) != 0) {
2338 + if (skb_linearize_cow(skb))
2339 goto out_ok;
2340 - }
2342 err = ipcomp_compress(x, skb);
2343 iph = skb->nh.iph;
2344 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
2345 index 00aa80e..30c81a8 100644
2346 --- a/net/ipv4/tcp.c
2347 +++ b/net/ipv4/tcp.c
2348 @@ -257,6 +257,7 @@ #include <linux/smp_lock.h>
2349 #include <linux/fs.h>
2350 #include <linux/random.h>
2351 #include <linux/bootmem.h>
2352 +#include <linux/err.h>
2354 #include <net/icmp.h>
2355 #include <net/tcp.h>
2356 @@ -570,7 +571,7 @@ new_segment:
2357 skb->ip_summed = CHECKSUM_HW;
2358 tp->write_seq += copy;
2359 TCP_SKB_CB(skb)->end_seq += copy;
2360 - skb_shinfo(skb)->tso_segs = 0;
2361 + skb_shinfo(skb)->gso_segs = 0;
2363 if (!copied)
2364 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
2365 @@ -621,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock
2366 ssize_t res;
2367 struct sock *sk = sock->sk;
2369 -#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
2371 if (!(sk->sk_route_caps & NETIF_F_SG) ||
2372 - !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
2373 + !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
2374 return sock_no_sendpage(sock, page, offset, size, flags);
2376 -#undef TCP_ZC_CSUM_FLAGS
2378 lock_sock(sk);
2379 TCP_CHECK_TIMER(sk);
2380 res = do_tcp_sendpages(sk, &page, offset, size, flags);
2381 @@ -725,9 +722,7 @@ new_segment:
2382 /*
2383 * Check whether we can use HW checksum.
2384 */
2385 - if (sk->sk_route_caps &
2386 - (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
2387 - NETIF_F_HW_CSUM))
2388 + if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
2389 skb->ip_summed = CHECKSUM_HW;
2391 skb_entail(sk, tp, skb);
2392 @@ -823,7 +818,7 @@ new_segment:
2394 tp->write_seq += copy;
2395 TCP_SKB_CB(skb)->end_seq += copy;
2396 - skb_shinfo(skb)->tso_segs = 0;
2397 + skb_shinfo(skb)->gso_segs = 0;
2399 from += copy;
2400 copied += copy;
2401 @@ -2026,6 +2021,77 @@ int tcp_getsockopt(struct sock *sk, int
2405 +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2406 +{
2407 + struct sk_buff *segs = ERR_PTR(-EINVAL);
2408 + struct tcphdr *th;
2409 + unsigned thlen;
2410 + unsigned int seq;
2411 + unsigned int delta;
2412 + unsigned int oldlen;
2413 + unsigned int len;
2415 + if (!pskb_may_pull(skb, sizeof(*th)))
2416 + goto out;
2418 + th = skb->h.th;
2419 + thlen = th->doff * 4;
2420 + if (thlen < sizeof(*th))
2421 + goto out;
2423 + if (!pskb_may_pull(skb, thlen))
2424 + goto out;
2426 + oldlen = (u16)~skb->len;
2427 + __skb_pull(skb, thlen);
2429 + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2430 + /* Packet is from an untrusted source, reset gso_segs. */
2431 + int mss = skb_shinfo(skb)->gso_size;
2433 + skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
2435 + segs = NULL;
2436 + goto out;
2437 + }
2439 + segs = skb_segment(skb, features);
2440 + if (IS_ERR(segs))
2441 + goto out;
2443 + len = skb_shinfo(skb)->gso_size;
2444 + delta = htonl(oldlen + (thlen + len));
2446 + skb = segs;
2447 + th = skb->h.th;
2448 + seq = ntohl(th->seq);
2450 + do {
2451 + th->fin = th->psh = 0;
2453 + th->check = ~csum_fold(th->check + delta);
2454 + if (skb->ip_summed != CHECKSUM_HW)
2455 + th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2456 + skb->csum));
2458 + seq += len;
2459 + skb = skb->next;
2460 + th = skb->h.th;
2462 + th->seq = htonl(seq);
2463 + th->cwr = 0;
2464 + } while (skb->next);
2466 + delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
2467 + th->check = ~csum_fold(th->check + delta);
2468 + if (skb->ip_summed != CHECKSUM_HW)
2469 + th->check = csum_fold(csum_partial(skb->h.raw, thlen,
2470 + skb->csum));
2472 +out:
2473 + return segs;
2474 +}
2476 extern void __skb_cb_too_small_for_tcp(int, int);
2477 extern struct tcp_congestion_ops tcp_reno;
2479 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
2480 index e9a54ae..defe77a 100644
2481 --- a/net/ipv4/tcp_input.c
2482 +++ b/net/ipv4/tcp_input.c
2483 @@ -1072,7 +1072,7 @@ tcp_sacktag_write_queue(struct sock *sk,
2484 else
2485 pkt_len = (end_seq -
2486 TCP_SKB_CB(skb)->seq);
2487 - if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
2488 + if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size))
2489 break;
2490 pcount = tcp_skb_pcount(skb);
2492 diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
2493 index 233bdf2..b4240b4 100644
2494 --- a/net/ipv4/tcp_ipv4.c
2495 +++ b/net/ipv4/tcp_ipv4.c
2496 @@ -495,6 +495,24 @@ void tcp_v4_send_check(struct sock *sk,
2500 +int tcp_v4_gso_send_check(struct sk_buff *skb)
2501 +{
2502 + struct iphdr *iph;
2503 + struct tcphdr *th;
2505 + if (!pskb_may_pull(skb, sizeof(*th)))
2506 + return -EINVAL;
2508 + iph = skb->nh.iph;
2509 + th = skb->h.th;
2511 + th->check = 0;
2512 + th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
2513 + skb->csum = offsetof(struct tcphdr, check);
2514 + skb->ip_summed = CHECKSUM_HW;
2515 + return 0;
2516 +}
2518 /*
2519 * This routine will send an RST to the other tcp.
2521 diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
2522 index 310f2e6..ee01f69 100644
2523 --- a/net/ipv4/tcp_output.c
2524 +++ b/net/ipv4/tcp_output.c
2525 @@ -497,15 +497,17 @@ static void tcp_set_skb_tso_segs(struct
2526 /* Avoid the costly divide in the normal
2527 * non-TSO case.
2528 */
2529 - skb_shinfo(skb)->tso_segs = 1;
2530 - skb_shinfo(skb)->tso_size = 0;
2531 + skb_shinfo(skb)->gso_segs = 1;
2532 + skb_shinfo(skb)->gso_size = 0;
2533 + skb_shinfo(skb)->gso_type = 0;
2534 } else {
2535 unsigned int factor;
2537 factor = skb->len + (mss_now - 1);
2538 factor /= mss_now;
2539 - skb_shinfo(skb)->tso_segs = factor;
2540 - skb_shinfo(skb)->tso_size = mss_now;
2541 + skb_shinfo(skb)->gso_segs = factor;
2542 + skb_shinfo(skb)->gso_size = mss_now;
2543 + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2547 @@ -850,7 +852,7 @@ static int tcp_init_tso_segs(struct sock
2549 if (!tso_segs ||
2550 (tso_segs > 1 &&
2551 - skb_shinfo(skb)->tso_size != mss_now)) {
2552 + tcp_skb_mss(skb) != mss_now)) {
2553 tcp_set_skb_tso_segs(sk, skb, mss_now);
2554 tso_segs = tcp_skb_pcount(skb);
2556 @@ -1510,8 +1512,9 @@ int tcp_retransmit_skb(struct sock *sk,
2557 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2558 if (!pskb_trim(skb, 0)) {
2559 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
2560 - skb_shinfo(skb)->tso_segs = 1;
2561 - skb_shinfo(skb)->tso_size = 0;
2562 + skb_shinfo(skb)->gso_segs = 1;
2563 + skb_shinfo(skb)->gso_size = 0;
2564 + skb_shinfo(skb)->gso_type = 0;
2565 skb->ip_summed = CHECKSUM_NONE;
2566 skb->csum = 0;
2568 @@ -1716,8 +1719,9 @@ void tcp_send_fin(struct sock *sk)
2569 skb->csum = 0;
2570 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2571 TCP_SKB_CB(skb)->sacked = 0;
2572 - skb_shinfo(skb)->tso_segs = 1;
2573 - skb_shinfo(skb)->tso_size = 0;
2574 + skb_shinfo(skb)->gso_segs = 1;
2575 + skb_shinfo(skb)->gso_size = 0;
2576 + skb_shinfo(skb)->gso_type = 0;
2578 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2579 TCP_SKB_CB(skb)->seq = tp->write_seq;
2580 @@ -1749,8 +1753,9 @@ void tcp_send_active_reset(struct sock *
2581 skb->csum = 0;
2582 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
2583 TCP_SKB_CB(skb)->sacked = 0;
2584 - skb_shinfo(skb)->tso_segs = 1;
2585 - skb_shinfo(skb)->tso_size = 0;
2586 + skb_shinfo(skb)->gso_segs = 1;
2587 + skb_shinfo(skb)->gso_size = 0;
2588 + skb_shinfo(skb)->gso_type = 0;
2590 /* Send it off. */
2591 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
2592 @@ -1833,8 +1838,9 @@ struct sk_buff * tcp_make_synack(struct
2593 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
2594 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
2595 TCP_SKB_CB(skb)->sacked = 0;
2596 - skb_shinfo(skb)->tso_segs = 1;
2597 - skb_shinfo(skb)->tso_size = 0;
2598 + skb_shinfo(skb)->gso_segs = 1;
2599 + skb_shinfo(skb)->gso_size = 0;
2600 + skb_shinfo(skb)->gso_type = 0;
2601 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2602 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2603 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
2604 @@ -1937,8 +1943,9 @@ int tcp_connect(struct sock *sk)
2605 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2606 TCP_ECN_send_syn(sk, tp, buff);
2607 TCP_SKB_CB(buff)->sacked = 0;
2608 - skb_shinfo(buff)->tso_segs = 1;
2609 - skb_shinfo(buff)->tso_size = 0;
2610 + skb_shinfo(buff)->gso_segs = 1;
2611 + skb_shinfo(buff)->gso_size = 0;
2612 + skb_shinfo(buff)->gso_type = 0;
2613 buff->csum = 0;
2614 TCP_SKB_CB(buff)->seq = tp->write_seq++;
2615 TCP_SKB_CB(buff)->end_seq = tp->write_seq;
2616 @@ -2042,8 +2049,9 @@ void tcp_send_ack(struct sock *sk)
2617 buff->csum = 0;
2618 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
2619 TCP_SKB_CB(buff)->sacked = 0;
2620 - skb_shinfo(buff)->tso_segs = 1;
2621 - skb_shinfo(buff)->tso_size = 0;
2622 + skb_shinfo(buff)->gso_segs = 1;
2623 + skb_shinfo(buff)->gso_size = 0;
2624 + skb_shinfo(buff)->gso_type = 0;
2626 /* Send it off, this clears delayed acks for us. */
2627 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
2628 @@ -2078,8 +2086,9 @@ static int tcp_xmit_probe_skb(struct soc
2629 skb->csum = 0;
2630 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
2631 TCP_SKB_CB(skb)->sacked = urgent;
2632 - skb_shinfo(skb)->tso_segs = 1;
2633 - skb_shinfo(skb)->tso_size = 0;
2634 + skb_shinfo(skb)->gso_segs = 1;
2635 + skb_shinfo(skb)->gso_size = 0;
2636 + skb_shinfo(skb)->gso_type = 0;
2638 /* Use a previous sequence. This should cause the other
2639 * end to send an ack. Don't queue or clone SKB, just
2640 diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
2641 index 32ad229..62ead52 100644
2642 --- a/net/ipv4/xfrm4_output.c
2643 +++ b/net/ipv4/xfrm4_output.c
2644 @@ -9,6 +9,8 @@
2645 */
2647 #include <linux/compiler.h>
2648 +#include <linux/if_ether.h>
2649 +#include <linux/kernel.h>
2650 #include <linux/skbuff.h>
2651 #include <linux/spinlock.h>
2652 #include <linux/netfilter_ipv4.h>
2653 @@ -152,16 +154,10 @@ error_nolock:
2654 goto out_exit;
2657 -static int xfrm4_output_finish(struct sk_buff *skb)
2658 +static int xfrm4_output_finish2(struct sk_buff *skb)
2660 int err;
2662 -#ifdef CONFIG_NETFILTER
2663 - if (!skb->dst->xfrm) {
2664 - IPCB(skb)->flags |= IPSKB_REROUTED;
2665 - return dst_output(skb);
2666 - }
2667 -#endif
2668 while (likely((err = xfrm4_output_one(skb)) == 0)) {
2669 nf_reset(skb);
2671 @@ -174,7 +170,7 @@ #endif
2672 return dst_output(skb);
2674 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
2675 - skb->dst->dev, xfrm4_output_finish);
2676 + skb->dst->dev, xfrm4_output_finish2);
2677 if (unlikely(err != 1))
2678 break;
2680 @@ -182,6 +178,48 @@ #endif
2681 return err;
2684 +static int xfrm4_output_finish(struct sk_buff *skb)
2685 +{
2686 + struct sk_buff *segs;
2688 +#ifdef CONFIG_NETFILTER
2689 + if (!skb->dst->xfrm) {
2690 + IPCB(skb)->flags |= IPSKB_REROUTED;
2691 + return dst_output(skb);
2692 + }
2693 +#endif
2695 + if (!skb_is_gso(skb))
2696 + return xfrm4_output_finish2(skb);
2698 + skb->protocol = htons(ETH_P_IP);
2699 + segs = skb_gso_segment(skb, 0);
2700 + kfree_skb(skb);
2701 + if (unlikely(IS_ERR(segs)))
2702 + return PTR_ERR(segs);
2704 + do {
2705 + struct sk_buff *nskb = segs->next;
2706 + int err;
2708 + segs->next = NULL;
2709 + err = xfrm4_output_finish2(segs);
2711 + if (unlikely(err)) {
2712 + while ((segs = nskb)) {
2713 + nskb = segs->next;
2714 + segs->next = NULL;
2715 + kfree_skb(segs);
2716 + }
2717 + return err;
2718 + }
2720 + segs = nskb;
2721 + } while (segs);
2723 + return 0;
2724 +}
2726 int xfrm4_output(struct sk_buff *skb)
2728 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
2729 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
2730 index 5bf70b1..33a5850 100644
2731 --- a/net/ipv6/ip6_output.c
2732 +++ b/net/ipv6/ip6_output.c
2733 @@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *s
2735 int ip6_output(struct sk_buff *skb)
2737 - if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
2738 + if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
2739 dst_allfrag(skb->dst))
2740 return ip6_fragment(skb, ip6_output2);
2741 else
2742 @@ -829,8 +829,9 @@ static inline int ip6_ufo_append_data(st
2743 struct frag_hdr fhdr;
2745 /* specify the length of each IP datagram fragment*/
2746 - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) -
2747 - sizeof(struct frag_hdr);
2748 + skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
2749 + sizeof(struct frag_hdr);
2750 + skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
2751 ipv6_select_ident(skb, &fhdr);
2752 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
2753 __skb_queue_tail(&sk->sk_write_queue, skb);
2754 diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
2755 index d511a88..ef56d5d 100644
2756 --- a/net/ipv6/ipcomp6.c
2757 +++ b/net/ipv6/ipcomp6.c
2758 @@ -64,7 +64,7 @@ static LIST_HEAD(ipcomp6_tfms_list);
2760 static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
2762 - int err = 0;
2763 + int err = -ENOMEM;
2764 u8 nexthdr = 0;
2765 int hdr_len = skb->h.raw - skb->nh.raw;
2766 unsigned char *tmp_hdr = NULL;
2767 @@ -75,11 +75,8 @@ static int ipcomp6_input(struct xfrm_sta
2768 struct crypto_tfm *tfm;
2769 int cpu;
2771 - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
2772 - skb_linearize(skb, GFP_ATOMIC) != 0) {
2773 - err = -ENOMEM;
2774 + if (skb_linearize_cow(skb))
2775 goto out;
2776 - }
2778 skb->ip_summed = CHECKSUM_NONE;
2780 @@ -158,10 +155,8 @@ static int ipcomp6_output(struct xfrm_st
2781 goto out_ok;
2784 - if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
2785 - skb_linearize(skb, GFP_ATOMIC) != 0) {
2786 + if (skb_linearize_cow(skb))
2787 goto out_ok;
2788 - }
2790 /* compression */
2791 plen = skb->len - hdr_len;
2792 diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
2793 index 8024217..e9ea338 100644
2794 --- a/net/ipv6/xfrm6_output.c
2795 +++ b/net/ipv6/xfrm6_output.c
2796 @@ -151,7 +151,7 @@ error_nolock:
2797 goto out_exit;
2800 -static int xfrm6_output_finish(struct sk_buff *skb)
2801 +static int xfrm6_output_finish2(struct sk_buff *skb)
2803 int err;
2805 @@ -167,7 +167,7 @@ static int xfrm6_output_finish(struct sk
2806 return dst_output(skb);
2808 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
2809 - skb->dst->dev, xfrm6_output_finish);
2810 + skb->dst->dev, xfrm6_output_finish2);
2811 if (unlikely(err != 1))
2812 break;
2814 @@ -175,6 +175,41 @@ static int xfrm6_output_finish(struct sk
2815 return err;
2818 +static int xfrm6_output_finish(struct sk_buff *skb)
2819 +{
2820 + struct sk_buff *segs;
2822 + if (!skb_is_gso(skb))
2823 + return xfrm6_output_finish2(skb);
2825 + skb->protocol = htons(ETH_P_IP);
2826 + segs = skb_gso_segment(skb, 0);
2827 + kfree_skb(skb);
2828 + if (unlikely(IS_ERR(segs)))
2829 + return PTR_ERR(segs);
2831 + do {
2832 + struct sk_buff *nskb = segs->next;
2833 + int err;
2835 + segs->next = NULL;
2836 + err = xfrm6_output_finish2(segs);
2838 + if (unlikely(err)) {
2839 + while ((segs = nskb)) {
2840 + nskb = segs->next;
2841 + segs->next = NULL;
2842 + kfree_skb(segs);
2843 + }
2844 + return err;
2845 + }
2847 + segs = nskb;
2848 + } while (segs);
2850 + return 0;
2851 +}
2853 int xfrm6_output(struct sk_buff *skb)
2855 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
2856 diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
2857 index 99ceb91..28c9efd 100644
2858 --- a/net/sched/sch_generic.c
2859 +++ b/net/sched/sch_generic.c
2860 @@ -72,9 +72,9 @@ void qdisc_unlock_tree(struct net_device
2861 dev->queue_lock serializes queue accesses for this device
2862 AND dev->qdisc pointer itself.
2864 - dev->xmit_lock serializes accesses to device driver.
2865 + netif_tx_lock serializes accesses to device driver.
2867 - dev->queue_lock and dev->xmit_lock are mutually exclusive,
2868 + dev->queue_lock and netif_tx_lock are mutually exclusive,
2869 if one is grabbed, another must be free.
2870 */
2872 @@ -90,14 +90,17 @@ void qdisc_unlock_tree(struct net_device
2873 NOTE: Called under dev->queue_lock with locally disabled BH.
2874 */
2876 -int qdisc_restart(struct net_device *dev)
2877 +static inline int qdisc_restart(struct net_device *dev)
2879 struct Qdisc *q = dev->qdisc;
2880 struct sk_buff *skb;
2882 /* Dequeue packet */
2883 - if ((skb = q->dequeue(q)) != NULL) {
2884 + if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
2885 unsigned nolock = (dev->features & NETIF_F_LLTX);
2887 + dev->gso_skb = NULL;
2889 /*
2890 * When the driver has LLTX set it does its own locking
2891 * in start_xmit. No need to add additional overhead by
2892 @@ -108,7 +111,7 @@ int qdisc_restart(struct net_device *dev
2893 * will be requeued.
2894 */
2895 if (!nolock) {
2896 - if (!spin_trylock(&dev->xmit_lock)) {
2897 + if (!netif_tx_trylock(dev)) {
2898 collision:
2899 /* So, someone grabbed the driver. */
2901 @@ -126,8 +129,6 @@ int qdisc_restart(struct net_device *dev
2902 __get_cpu_var(netdev_rx_stat).cpu_collision++;
2903 goto requeue;
2905 - /* Remember that the driver is grabbed by us. */
2906 - dev->xmit_lock_owner = smp_processor_id();
2910 @@ -136,14 +137,11 @@ int qdisc_restart(struct net_device *dev
2912 if (!netif_queue_stopped(dev)) {
2913 int ret;
2914 - if (netdev_nit)
2915 - dev_queue_xmit_nit(skb, dev);
2917 - ret = dev->hard_start_xmit(skb, dev);
2918 + ret = dev_hard_start_xmit(skb, dev);
2919 if (ret == NETDEV_TX_OK) {
2920 if (!nolock) {
2921 - dev->xmit_lock_owner = -1;
2922 - spin_unlock(&dev->xmit_lock);
2923 + netif_tx_unlock(dev);
2925 spin_lock(&dev->queue_lock);
2926 return -1;
2927 @@ -157,8 +155,7 @@ int qdisc_restart(struct net_device *dev
2928 /* NETDEV_TX_BUSY - we need to requeue */
2929 /* Release the driver */
2930 if (!nolock) {
2931 - dev->xmit_lock_owner = -1;
2932 - spin_unlock(&dev->xmit_lock);
2933 + netif_tx_unlock(dev);
2935 spin_lock(&dev->queue_lock);
2936 q = dev->qdisc;
2937 @@ -175,7 +172,10 @@ int qdisc_restart(struct net_device *dev
2938 */
2940 requeue:
2941 - q->ops->requeue(skb, q);
2942 + if (skb->next)
2943 + dev->gso_skb = skb;
2944 + else
2945 + q->ops->requeue(skb, q);
2946 netif_schedule(dev);
2947 return 1;
2949 @@ -183,11 +183,23 @@ requeue:
2950 return q->q.qlen;
2953 +void __qdisc_run(struct net_device *dev)
2954 +{
2955 + if (unlikely(dev->qdisc == &noop_qdisc))
2956 + goto out;
2958 + while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
2959 + /* NOTHING */;
2961 +out:
2962 + clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
2963 +}
2965 static void dev_watchdog(unsigned long arg)
2967 struct net_device *dev = (struct net_device *)arg;
2969 - spin_lock(&dev->xmit_lock);
2970 + netif_tx_lock(dev);
2971 if (dev->qdisc != &noop_qdisc) {
2972 if (netif_device_present(dev) &&
2973 netif_running(dev) &&
2974 @@ -201,7 +213,7 @@ static void dev_watchdog(unsigned long a
2975 dev_hold(dev);
2978 - spin_unlock(&dev->xmit_lock);
2979 + netif_tx_unlock(dev);
2981 dev_put(dev);
2983 @@ -225,17 +237,17 @@ void __netdev_watchdog_up(struct net_dev
2985 static void dev_watchdog_up(struct net_device *dev)
2987 - spin_lock_bh(&dev->xmit_lock);
2988 + netif_tx_lock_bh(dev);
2989 __netdev_watchdog_up(dev);
2990 - spin_unlock_bh(&dev->xmit_lock);
2991 + netif_tx_unlock_bh(dev);
2994 static void dev_watchdog_down(struct net_device *dev)
2996 - spin_lock_bh(&dev->xmit_lock);
2997 + netif_tx_lock_bh(dev);
2998 if (del_timer(&dev->watchdog_timer))
2999 __dev_put(dev);
3000 - spin_unlock_bh(&dev->xmit_lock);
3001 + netif_tx_unlock_bh(dev);
3004 void netif_carrier_on(struct net_device *dev)
3005 @@ -577,10 +589,17 @@ void dev_deactivate(struct net_device *d
3007 dev_watchdog_down(dev);
3009 - while (test_bit(__LINK_STATE_SCHED, &dev->state))
3010 + /* Wait for outstanding dev_queue_xmit calls. */
3011 + synchronize_rcu();
3013 + /* Wait for outstanding qdisc_run calls. */
3014 + while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
3015 yield();
3017 - spin_unlock_wait(&dev->xmit_lock);
3018 + if (dev->gso_skb) {
3019 + kfree_skb(dev->gso_skb);
3020 + dev->gso_skb = NULL;
3021 + }
3024 void dev_init_scheduler(struct net_device *dev)
3025 @@ -622,6 +641,5 @@ EXPORT_SYMBOL(qdisc_create_dflt);
3026 EXPORT_SYMBOL(qdisc_alloc);
3027 EXPORT_SYMBOL(qdisc_destroy);
3028 EXPORT_SYMBOL(qdisc_reset);
3029 -EXPORT_SYMBOL(qdisc_restart);
3030 EXPORT_SYMBOL(qdisc_lock_tree);
3031 EXPORT_SYMBOL(qdisc_unlock_tree);
3032 diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
3033 index 79b8ef3..4c16ad5 100644
3034 --- a/net/sched/sch_teql.c
3035 +++ b/net/sched/sch_teql.c
3036 @@ -302,20 +302,17 @@ restart:
3038 switch (teql_resolve(skb, skb_res, slave)) {
3039 case 0:
3040 - if (spin_trylock(&slave->xmit_lock)) {
3041 - slave->xmit_lock_owner = smp_processor_id();
3042 + if (netif_tx_trylock(slave)) {
3043 if (!netif_queue_stopped(slave) &&
3044 slave->hard_start_xmit(skb, slave) == 0) {
3045 - slave->xmit_lock_owner = -1;
3046 - spin_unlock(&slave->xmit_lock);
3047 + netif_tx_unlock(slave);
3048 master->slaves = NEXT_SLAVE(q);
3049 netif_wake_queue(dev);
3050 master->stats.tx_packets++;
3051 master->stats.tx_bytes += len;
3052 return 0;
3054 - slave->xmit_lock_owner = -1;
3055 - spin_unlock(&slave->xmit_lock);
3056 + netif_tx_unlock(slave);
3058 if (netif_queue_stopped(dev))
3059 busy = 1;