ia64/linux-2.6.18-xen.hg

view drivers/net/loopback.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Pseudo-driver for the loopback interface.
7 *
8 * Version: @(#)loopback.c 1.0.4b 08/16/93
9 *
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@scyld.com>
13 *
14 * Alan Cox : Fixed oddments for NET3.014
15 * Alan Cox : Rejig for NET3.029 snap #3
16 * Alan Cox : Fixed NET3.029 bugs and sped up
17 * Larry McVoy : Tiny tweak to double performance
18 * Alan Cox : Backed out LMV's tweak - the linux mm
19 * can't take it...
20 * Michael Griffith: Don't bother computing the checksums
21 * on packets received on the loopback
22 * interface.
23 * Alexey Kuznetsov: Potential hang under some extreme
24 * cases removed.
25 *
26 * This program is free software; you can redistribute it and/or
27 * modify it under the terms of the GNU General Public License
28 * as published by the Free Software Foundation; either version
29 * 2 of the License, or (at your option) any later version.
30 */
31 #include <linux/kernel.h>
32 #include <linux/jiffies.h>
33 #include <linux/module.h>
34 #include <linux/interrupt.h>
35 #include <linux/fs.h>
36 #include <linux/types.h>
37 #include <linux/string.h>
38 #include <linux/socket.h>
39 #include <linux/errno.h>
40 #include <linux/fcntl.h>
41 #include <linux/in.h>
42 #include <linux/init.h>
44 #include <asm/system.h>
45 #include <asm/uaccess.h>
46 #include <asm/io.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/skbuff.h>
52 #include <linux/ethtool.h>
53 #include <net/sock.h>
54 #include <net/checksum.h>
55 #include <linux/if_ether.h> /* For the statistics structure. */
56 #include <linux/if_arp.h> /* For ARPHRD_ETHER */
57 #include <linux/ip.h>
58 #include <linux/tcp.h>
59 #include <linux/percpu.h>
61 static DEFINE_PER_CPU(struct net_device_stats, loopback_stats);
63 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
65 /* KISS: just allocate small chunks and copy bits.
66 *
67 * So, in fact, this is documentation, explaining what we expect
68 * of largesending device modulo TCP checksum, which is ignored for loopback.
69 */
71 #ifdef LOOPBACK_TSO
72 static void emulate_large_send_offload(struct sk_buff *skb)
73 {
74 struct iphdr *iph = skb->nh.iph;
75 struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
76 unsigned int doffset = (iph->ihl + th->doff) * 4;
77 unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
78 unsigned int offset = 0;
79 u32 seq = ntohl(th->seq);
80 u16 id = ntohs(iph->id);
82 while (offset + doffset < skb->len) {
83 unsigned int frag_size = min(mtu, skb->len - offset) - doffset;
84 struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC);
86 if (!nskb)
87 break;
88 skb_reserve(nskb, 32);
89 nskb->mac.raw = nskb->data - 14;
90 nskb->nh.raw = nskb->data;
91 iph = nskb->nh.iph;
92 memcpy(nskb->data, skb->nh.raw, doffset);
93 if (skb_copy_bits(skb,
94 doffset + offset,
95 nskb->data + doffset,
96 frag_size))
97 BUG();
98 skb_put(nskb, doffset + frag_size);
99 nskb->ip_summed = CHECKSUM_UNNECESSARY;
100 nskb->dev = skb->dev;
101 nskb->priority = skb->priority;
102 nskb->protocol = skb->protocol;
103 nskb->dst = dst_clone(skb->dst);
104 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
105 nskb->pkt_type = skb->pkt_type;
107 th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
108 iph->tot_len = htons(frag_size + doffset);
109 iph->id = htons(id);
110 iph->check = 0;
111 iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl);
112 th->seq = htonl(seq);
113 if (offset + doffset + frag_size < skb->len)
114 th->fin = th->psh = 0;
115 netif_rx(nskb);
116 offset += frag_size;
117 seq += frag_size;
118 id++;
119 }
121 dev_kfree_skb(skb);
122 }
123 #endif /* LOOPBACK_TSO */
125 /*
126 * The higher levels take care of making this non-reentrant (it's
127 * called with bh's disabled).
128 */
129 static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
130 {
131 struct net_device_stats *lb_stats;
133 skb_orphan(skb);
135 skb->protocol = eth_type_trans(skb,dev);
136 skb->dev = dev;
137 #ifndef LOOPBACK_MUST_CHECKSUM
138 skb->ip_summed = CHECKSUM_UNNECESSARY;
139 #endif
141 #ifdef LOOPBACK_TSO
142 if (skb_is_gso(skb)) {
143 BUG_ON(skb->protocol != htons(ETH_P_IP));
144 BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
146 emulate_large_send_offload(skb);
147 return 0;
148 }
149 #endif
150 dev->last_rx = jiffies;
152 lb_stats = &per_cpu(loopback_stats, get_cpu());
153 lb_stats->rx_bytes += skb->len;
154 lb_stats->tx_bytes = lb_stats->rx_bytes;
155 lb_stats->rx_packets++;
156 lb_stats->tx_packets = lb_stats->rx_packets;
157 put_cpu();
159 netif_rx(skb);
161 return(0);
162 }
164 static struct net_device_stats *get_stats(struct net_device *dev)
165 {
166 struct net_device_stats *stats = dev->priv;
167 int i;
169 if (!stats) {
170 return NULL;
171 }
173 memset(stats, 0, sizeof(struct net_device_stats));
175 for_each_possible_cpu(i) {
176 struct net_device_stats *lb_stats;
178 lb_stats = &per_cpu(loopback_stats, i);
179 stats->rx_bytes += lb_stats->rx_bytes;
180 stats->tx_bytes += lb_stats->tx_bytes;
181 stats->rx_packets += lb_stats->rx_packets;
182 stats->tx_packets += lb_stats->tx_packets;
183 }
185 return stats;
186 }
188 static u32 loopback_get_link(struct net_device *dev)
189 {
190 return 1;
191 }
193 static struct ethtool_ops loopback_ethtool_ops = {
194 .get_link = loopback_get_link,
195 .get_tso = ethtool_op_get_tso,
196 .set_tso = ethtool_op_set_tso,
197 };
199 struct net_device loopback_dev = {
200 .name = "lo",
201 .mtu = (16 * 1024) + 20 + 20 + 12,
202 .hard_start_xmit = loopback_xmit,
203 .hard_header = eth_header,
204 .hard_header_cache = eth_header_cache,
205 .header_cache_update = eth_header_cache_update,
206 .hard_header_len = ETH_HLEN, /* 14 */
207 .addr_len = ETH_ALEN, /* 6 */
208 .tx_queue_len = 0,
209 .type = ARPHRD_LOOPBACK, /* 0x0001*/
210 .rebuild_header = eth_rebuild_header,
211 .flags = IFF_LOOPBACK,
212 .features = NETIF_F_SG | NETIF_F_FRAGLIST
213 #ifdef LOOPBACK_TSO
214 | NETIF_F_TSO
215 #endif
216 | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA
217 | NETIF_F_LLTX,
218 .ethtool_ops = &loopback_ethtool_ops,
219 };
221 /* Setup and register the loopback device. */
222 int __init loopback_init(void)
223 {
224 struct net_device_stats *stats;
226 /* Can survive without statistics */
227 stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
228 if (stats) {
229 memset(stats, 0, sizeof(struct net_device_stats));
230 loopback_dev.priv = stats;
231 loopback_dev.get_stats = &get_stats;
232 }
234 return register_netdev(&loopback_dev);
235 };
237 EXPORT_SYMBOL(loopback_dev);