ia64/linux-2.6.18-xen.hg

view drivers/net/ixp2000/ixpdev.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * IXP2000 MSF network device driver
3 * Copyright (C) 2004, 2005 Lennert Buytenhek <buytenh@wantstofly.org>
4 * Dedicated to Marija Kulikova.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
15 #include <linux/etherdevice.h>
16 #include <linux/init.h>
17 #include <linux/moduleparam.h>
18 #include <asm/hardware/uengine.h>
19 #include <asm/mach-types.h>
20 #include <asm/io.h>
21 #include "ixp2400_rx.ucode"
22 #include "ixp2400_tx.ucode"
23 #include "ixpdev_priv.h"
24 #include "ixpdev.h"
26 #define DRV_MODULE_VERSION "0.2"
28 static int nds_count;
29 static struct net_device **nds;
30 static int nds_open;
31 static void (*set_port_admin_status)(int port, int up);
33 static struct ixpdev_rx_desc * const rx_desc =
34 (struct ixpdev_rx_desc *)(IXP2000_SRAM0_VIRT_BASE + RX_BUF_DESC_BASE);
35 static struct ixpdev_tx_desc * const tx_desc =
36 (struct ixpdev_tx_desc *)(IXP2000_SRAM0_VIRT_BASE + TX_BUF_DESC_BASE);
37 static int tx_pointer;
40 static int ixpdev_xmit(struct sk_buff *skb, struct net_device *dev)
41 {
42 struct ixpdev_priv *ip = netdev_priv(dev);
43 struct ixpdev_tx_desc *desc;
44 int entry;
46 if (unlikely(skb->len > PAGE_SIZE)) {
47 /* @@@ Count drops. */
48 dev_kfree_skb(skb);
49 return 0;
50 }
52 entry = tx_pointer;
53 tx_pointer = (tx_pointer + 1) % TX_BUF_COUNT;
55 desc = tx_desc + entry;
56 desc->pkt_length = skb->len;
57 desc->channel = ip->channel;
59 skb_copy_and_csum_dev(skb, phys_to_virt(desc->buf_addr));
60 dev_kfree_skb(skb);
62 ixp2000_reg_write(RING_TX_PENDING,
63 TX_BUF_DESC_BASE + (entry * sizeof(struct ixpdev_tx_desc)));
65 dev->trans_start = jiffies;
67 local_irq_disable();
68 ip->tx_queue_entries++;
69 if (ip->tx_queue_entries == TX_BUF_COUNT_PER_CHAN)
70 netif_stop_queue(dev);
71 local_irq_enable();
73 return 0;
74 }
77 static int ixpdev_rx(struct net_device *dev, int *budget)
78 {
79 while (*budget > 0) {
80 struct ixpdev_rx_desc *desc;
81 struct sk_buff *skb;
82 void *buf;
83 u32 _desc;
85 _desc = ixp2000_reg_read(RING_RX_DONE);
86 if (_desc == 0)
87 return 0;
89 desc = rx_desc +
90 ((_desc - RX_BUF_DESC_BASE) / sizeof(struct ixpdev_rx_desc));
91 buf = phys_to_virt(desc->buf_addr);
93 if (desc->pkt_length < 4 || desc->pkt_length > PAGE_SIZE) {
94 printk(KERN_ERR "ixp2000: rx err, length %d\n",
95 desc->pkt_length);
96 goto err;
97 }
99 if (desc->channel < 0 || desc->channel >= nds_count) {
100 printk(KERN_ERR "ixp2000: rx err, channel %d\n",
101 desc->channel);
102 goto err;
103 }
105 /* @@@ Make FCS stripping configurable. */
106 desc->pkt_length -= 4;
108 if (unlikely(!netif_running(nds[desc->channel])))
109 goto err;
111 skb = dev_alloc_skb(desc->pkt_length + 2);
112 if (likely(skb != NULL)) {
113 skb->dev = nds[desc->channel];
114 skb_reserve(skb, 2);
115 eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
116 skb_put(skb, desc->pkt_length);
117 skb->protocol = eth_type_trans(skb, skb->dev);
119 skb->dev->last_rx = jiffies;
121 netif_receive_skb(skb);
122 }
124 err:
125 ixp2000_reg_write(RING_RX_PENDING, _desc);
126 dev->quota--;
127 (*budget)--;
128 }
130 return 1;
131 }
133 /* dev always points to nds[0]. */
134 static int ixpdev_poll(struct net_device *dev, int *budget)
135 {
136 /* @@@ Have to stop polling when nds[0] is administratively
137 * downed while we are polling. */
138 do {
139 ixp2000_reg_write(IXP2000_IRQ_THD_RAW_STATUS_A_0, 0x00ff);
141 if (ixpdev_rx(dev, budget))
142 return 1;
143 } while (ixp2000_reg_read(IXP2000_IRQ_THD_RAW_STATUS_A_0) & 0x00ff);
145 netif_rx_complete(dev);
146 ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_SET_A_0, 0x00ff);
148 return 0;
149 }
151 static void ixpdev_tx_complete(void)
152 {
153 int channel;
154 u32 wake;
156 wake = 0;
157 while (1) {
158 struct ixpdev_priv *ip;
159 u32 desc;
160 int entry;
162 desc = ixp2000_reg_read(RING_TX_DONE);
163 if (desc == 0)
164 break;
166 /* @@@ Check whether entries come back in order. */
167 entry = (desc - TX_BUF_DESC_BASE) / sizeof(struct ixpdev_tx_desc);
168 channel = tx_desc[entry].channel;
170 if (channel < 0 || channel >= nds_count) {
171 printk(KERN_ERR "ixp2000: txcomp channel index "
172 "out of bounds (%d, %.8i, %d)\n",
173 channel, (unsigned int)desc, entry);
174 continue;
175 }
177 ip = netdev_priv(nds[channel]);
178 if (ip->tx_queue_entries == TX_BUF_COUNT_PER_CHAN)
179 wake |= 1 << channel;
180 ip->tx_queue_entries--;
181 }
183 for (channel = 0; wake != 0; channel++) {
184 if (wake & (1 << channel)) {
185 netif_wake_queue(nds[channel]);
186 wake &= ~(1 << channel);
187 }
188 }
189 }
191 static irqreturn_t ixpdev_interrupt(int irq, void *dev_id, struct pt_regs *regs)
192 {
193 u32 status;
195 status = ixp2000_reg_read(IXP2000_IRQ_THD_STATUS_A_0);
196 if (status == 0)
197 return IRQ_NONE;
199 /*
200 * Any of the eight receive units signaled RX?
201 */
202 if (status & 0x00ff) {
203 ixp2000_reg_wrb(IXP2000_IRQ_THD_ENABLE_CLEAR_A_0, 0x00ff);
204 if (likely(__netif_rx_schedule_prep(nds[0]))) {
205 __netif_rx_schedule(nds[0]);
206 } else {
207 printk(KERN_CRIT "ixp2000: irq while polling!!\n");
208 }
209 }
211 /*
212 * Any of the eight transmit units signaled TXdone?
213 */
214 if (status & 0xff00) {
215 ixp2000_reg_wrb(IXP2000_IRQ_THD_RAW_STATUS_A_0, 0xff00);
216 ixpdev_tx_complete();
217 }
219 return IRQ_HANDLED;
220 }
222 #ifdef CONFIG_NET_POLL_CONTROLLER
223 static void ixpdev_poll_controller(struct net_device *dev)
224 {
225 disable_irq(IRQ_IXP2000_THDA0);
226 ixpdev_interrupt(IRQ_IXP2000_THDA0, dev, NULL);
227 enable_irq(IRQ_IXP2000_THDA0);
228 }
229 #endif
231 static int ixpdev_open(struct net_device *dev)
232 {
233 struct ixpdev_priv *ip = netdev_priv(dev);
234 int err;
236 if (!nds_open++) {
237 err = request_irq(IRQ_IXP2000_THDA0, ixpdev_interrupt,
238 IRQF_SHARED, "ixp2000_eth", nds);
239 if (err) {
240 nds_open--;
241 return err;
242 }
244 ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_SET_A_0, 0xffff);
245 }
247 set_port_admin_status(ip->channel, 1);
248 netif_start_queue(dev);
250 return 0;
251 }
253 static int ixpdev_close(struct net_device *dev)
254 {
255 struct ixpdev_priv *ip = netdev_priv(dev);
257 netif_stop_queue(dev);
258 set_port_admin_status(ip->channel, 0);
260 if (!--nds_open) {
261 ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_CLEAR_A_0, 0xffff);
262 free_irq(IRQ_IXP2000_THDA0, nds);
263 }
265 return 0;
266 }
268 struct net_device *ixpdev_alloc(int channel, int sizeof_priv)
269 {
270 struct net_device *dev;
271 struct ixpdev_priv *ip;
273 dev = alloc_etherdev(sizeof_priv);
274 if (dev == NULL)
275 return NULL;
277 dev->hard_start_xmit = ixpdev_xmit;
278 dev->poll = ixpdev_poll;
279 dev->open = ixpdev_open;
280 dev->stop = ixpdev_close;
281 #ifdef CONFIG_NET_POLL_CONTROLLER
282 dev->poll_controller = ixpdev_poll_controller;
283 #endif
285 dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
286 dev->weight = 64;
288 ip = netdev_priv(dev);
289 ip->channel = channel;
290 ip->tx_queue_entries = 0;
292 return dev;
293 }
295 int ixpdev_init(int __nds_count, struct net_device **__nds,
296 void (*__set_port_admin_status)(int port, int up))
297 {
298 int i;
299 int err;
301 BUILD_BUG_ON(RX_BUF_COUNT > 192 || TX_BUF_COUNT > 192);
303 printk(KERN_INFO "IXP2000 MSF ethernet driver %s\n", DRV_MODULE_VERSION);
305 nds_count = __nds_count;
306 nds = __nds;
307 set_port_admin_status = __set_port_admin_status;
309 for (i = 0; i < RX_BUF_COUNT; i++) {
310 void *buf;
312 buf = (void *)get_zeroed_page(GFP_KERNEL);
313 if (buf == NULL) {
314 err = -ENOMEM;
315 while (--i >= 0)
316 free_page((unsigned long)phys_to_virt(rx_desc[i].buf_addr));
317 goto err_out;
318 }
319 rx_desc[i].buf_addr = virt_to_phys(buf);
320 rx_desc[i].buf_length = PAGE_SIZE;
321 }
323 /* @@@ Maybe we shouldn't be preallocating TX buffers. */
324 for (i = 0; i < TX_BUF_COUNT; i++) {
325 void *buf;
327 buf = (void *)get_zeroed_page(GFP_KERNEL);
328 if (buf == NULL) {
329 err = -ENOMEM;
330 while (--i >= 0)
331 free_page((unsigned long)phys_to_virt(tx_desc[i].buf_addr));
332 goto err_free_rx;
333 }
334 tx_desc[i].buf_addr = virt_to_phys(buf);
335 }
337 /* 256 entries, ring status set means 'empty', base address 0x0000. */
338 ixp2000_reg_write(RING_RX_PENDING_BASE, 0x44000000);
339 ixp2000_reg_write(RING_RX_PENDING_HEAD, 0x00000000);
340 ixp2000_reg_write(RING_RX_PENDING_TAIL, 0x00000000);
342 /* 256 entries, ring status set means 'full', base address 0x0400. */
343 ixp2000_reg_write(RING_RX_DONE_BASE, 0x40000400);
344 ixp2000_reg_write(RING_RX_DONE_HEAD, 0x00000000);
345 ixp2000_reg_write(RING_RX_DONE_TAIL, 0x00000000);
347 for (i = 0; i < RX_BUF_COUNT; i++) {
348 ixp2000_reg_write(RING_RX_PENDING,
349 RX_BUF_DESC_BASE + (i * sizeof(struct ixpdev_rx_desc)));
350 }
352 ixp2000_uengine_load(0, &ixp2400_rx);
353 ixp2000_uengine_start_contexts(0, 0xff);
355 /* 256 entries, ring status set means 'empty', base address 0x0800. */
356 ixp2000_reg_write(RING_TX_PENDING_BASE, 0x44000800);
357 ixp2000_reg_write(RING_TX_PENDING_HEAD, 0x00000000);
358 ixp2000_reg_write(RING_TX_PENDING_TAIL, 0x00000000);
360 /* 256 entries, ring status set means 'full', base address 0x0c00. */
361 ixp2000_reg_write(RING_TX_DONE_BASE, 0x40000c00);
362 ixp2000_reg_write(RING_TX_DONE_HEAD, 0x00000000);
363 ixp2000_reg_write(RING_TX_DONE_TAIL, 0x00000000);
365 ixp2000_uengine_load(1, &ixp2400_tx);
366 ixp2000_uengine_start_contexts(1, 0xff);
368 for (i = 0; i < nds_count; i++) {
369 err = register_netdev(nds[i]);
370 if (err) {
371 while (--i >= 0)
372 unregister_netdev(nds[i]);
373 goto err_free_tx;
374 }
375 }
377 for (i = 0; i < nds_count; i++) {
378 printk(KERN_INFO "%s: IXP2000 MSF ethernet (port %d), "
379 "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x.\n", nds[i]->name, i,
380 nds[i]->dev_addr[0], nds[i]->dev_addr[1],
381 nds[i]->dev_addr[2], nds[i]->dev_addr[3],
382 nds[i]->dev_addr[4], nds[i]->dev_addr[5]);
383 }
385 return 0;
387 err_free_tx:
388 for (i = 0; i < TX_BUF_COUNT; i++)
389 free_page((unsigned long)phys_to_virt(tx_desc[i].buf_addr));
391 err_free_rx:
392 for (i = 0; i < RX_BUF_COUNT; i++)
393 free_page((unsigned long)phys_to_virt(rx_desc[i].buf_addr));
395 err_out:
396 return err;
397 }
399 void ixpdev_deinit(void)
400 {
401 int i;
403 /* @@@ Flush out pending packets. */
405 for (i = 0; i < nds_count; i++)
406 unregister_netdev(nds[i]);
408 ixp2000_uengine_stop_contexts(1, 0xff);
409 ixp2000_uengine_stop_contexts(0, 0xff);
410 ixp2000_uengine_reset(0x3);
412 for (i = 0; i < TX_BUF_COUNT; i++)
413 free_page((unsigned long)phys_to_virt(tx_desc[i].buf_addr));
415 for (i = 0; i < RX_BUF_COUNT; i++)
416 free_page((unsigned long)phys_to_virt(rx_desc[i].buf_addr));
417 }