ia64/linux-2.6.18-xen.hg

view drivers/net/isa-skeleton.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* isa-skeleton.c: A network driver outline for linux.
2 *
3 * Written 1993-94 by Donald Becker.
4 *
5 * Copyright 1993 United States Government as represented by the
6 * Director, National Security Agency.
7 *
8 * This software may be used and distributed according to the terms
9 * of the GNU General Public License, incorporated herein by reference.
10 *
11 * The author may be reached as becker@scyld.com, or C/O
12 * Scyld Computing Corporation
13 * 410 Severn Ave., Suite 210
14 * Annapolis MD 21403
15 *
16 * This file is an outline for writing a network device driver for the
17 * the Linux operating system.
18 *
19 * To write (or understand) a driver, have a look at the "loopback.c" file to
20 * get a feel of what is going on, and then use the code below as a skeleton
21 * for the new driver.
22 *
23 */
25 static const char *version =
26 "isa-skeleton.c:v1.51 9/24/94 Donald Becker (becker@cesdis.gsfc.nasa.gov)\n";
28 /*
29 * Sources:
30 * List your sources of programming information to document that
31 * the driver is your own creation, and give due credit to others
32 * that contributed to the work. Remember that GNU project code
33 * cannot use proprietary or trade secret information. Interface
34 * definitions are generally considered non-copyrightable to the
35 * extent that the same names and structures must be used to be
36 * compatible.
37 *
38 * Finally, keep in mind that the Linux kernel is has an API, not
39 * ABI. Proprietary object-code-only distributions are not permitted
40 * under the GPL.
41 */
43 #include <linux/module.h>
44 #include <linux/kernel.h>
45 #include <linux/types.h>
46 #include <linux/fcntl.h>
47 #include <linux/interrupt.h>
48 #include <linux/ioport.h>
49 #include <linux/in.h>
50 #include <linux/slab.h>
51 #include <linux/string.h>
52 #include <linux/spinlock.h>
53 #include <linux/errno.h>
54 #include <linux/init.h>
55 #include <linux/netdevice.h>
56 #include <linux/etherdevice.h>
57 #include <linux/skbuff.h>
58 #include <linux/bitops.h>
60 #include <asm/system.h>
61 #include <asm/io.h>
62 #include <asm/dma.h>
64 /*
65 * The name of the card. Is used for messages and in the requests for
66 * io regions, irqs and dma channels
67 */
68 static const char* cardname = "netcard";
70 /* First, a few definitions that the brave might change. */
72 /* A zero-terminated list of I/O addresses to be probed. */
73 static unsigned int netcard_portlist[] __initdata =
74 { 0x200, 0x240, 0x280, 0x2C0, 0x300, 0x320, 0x340, 0};
76 /* use 0 for production, 1 for verification, >2 for debug */
77 #ifndef NET_DEBUG
78 #define NET_DEBUG 2
79 #endif
80 static unsigned int net_debug = NET_DEBUG;
82 /* The number of low I/O ports used by the ethercard. */
83 #define NETCARD_IO_EXTENT 32
85 #define MY_TX_TIMEOUT ((400*HZ)/1000)
87 /* Information that need to be kept for each board. */
88 struct net_local {
89 struct net_device_stats stats;
90 long open_time; /* Useless example local info. */
92 /* Tx control lock. This protects the transmit buffer ring
93 * state along with the "tx full" state of the driver. This
94 * means all netif_queue flow control actions are protected
95 * by this lock as well.
96 */
97 spinlock_t lock;
98 };
100 /* The station (ethernet) address prefix, used for IDing the board. */
101 #define SA_ADDR0 0x00
102 #define SA_ADDR1 0x42
103 #define SA_ADDR2 0x65
105 /* Index to functions, as function prototypes. */
107 static int netcard_probe1(struct net_device *dev, int ioaddr);
108 static int net_open(struct net_device *dev);
109 static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
110 static irqreturn_t net_interrupt(int irq, void *dev_id, struct pt_regs *regs);
111 static void net_rx(struct net_device *dev);
112 static int net_close(struct net_device *dev);
113 static struct net_device_stats *net_get_stats(struct net_device *dev);
114 static void set_multicast_list(struct net_device *dev);
115 static void net_tx_timeout(struct net_device *dev);
118 /* Example routines you must write ;->. */
119 #define tx_done(dev) 1
120 static void hardware_send_packet(short ioaddr, char *buf, int length);
121 static void chipset_init(struct net_device *dev, int startp);
123 /*
124 * Check for a network adaptor of this type, and return '0' iff one exists.
125 * If dev->base_addr == 0, probe all likely locations.
126 * If dev->base_addr == 1, always return failure.
127 * If dev->base_addr == 2, allocate space for the device and return success
128 * (detachable devices only).
129 */
130 static int __init do_netcard_probe(struct net_device *dev)
131 {
132 int i;
133 int base_addr = dev->base_addr;
134 int irq = dev->irq;
136 SET_MODULE_OWNER(dev);
138 if (base_addr > 0x1ff) /* Check a single specified location. */
139 return netcard_probe1(dev, base_addr);
140 else if (base_addr != 0) /* Don't probe at all. */
141 return -ENXIO;
143 for (i = 0; netcard_portlist[i]; i++) {
144 int ioaddr = netcard_portlist[i];
145 if (netcard_probe1(dev, ioaddr) == 0)
146 return 0;
147 dev->irq = irq;
148 }
150 return -ENODEV;
151 }
153 static void cleanup_card(struct net_device *dev)
154 {
155 #ifdef jumpered_dma
156 free_dma(dev->dma);
157 #endif
158 #ifdef jumpered_interrupts
159 free_irq(dev->irq, dev);
160 #endif
161 release_region(dev->base_addr, NETCARD_IO_EXTENT);
162 }
164 #ifndef MODULE
165 struct net_device * __init netcard_probe(int unit)
166 {
167 struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
168 int err;
170 if (!dev)
171 return ERR_PTR(-ENOMEM);
173 sprintf(dev->name, "eth%d", unit);
174 netdev_boot_setup_check(dev);
176 err = do_netcard_probe(dev);
177 if (err)
178 goto out;
179 return dev;
180 out:
181 free_netdev(dev);
182 return ERR_PTR(err);
183 }
184 #endif
186 /*
187 * This is the real probe routine. Linux has a history of friendly device
188 * probes on the ISA bus. A good device probes avoids doing writes, and
189 * verifies that the correct device exists and functions.
190 */
191 static int __init netcard_probe1(struct net_device *dev, int ioaddr)
192 {
193 struct net_local *np;
194 static unsigned version_printed;
195 int i;
196 int err = -ENODEV;
198 /* Grab the region so that no one else tries to probe our ioports. */
199 if (!request_region(ioaddr, NETCARD_IO_EXTENT, cardname))
200 return -EBUSY;
202 /*
203 * For ethernet adaptors the first three octets of the station address
204 * contains the manufacturer's unique code. That might be a good probe
205 * method. Ideally you would add additional checks.
206 */
207 if (inb(ioaddr + 0) != SA_ADDR0
208 || inb(ioaddr + 1) != SA_ADDR1
209 || inb(ioaddr + 2) != SA_ADDR2)
210 goto out;
212 if (net_debug && version_printed++ == 0)
213 printk(KERN_DEBUG "%s", version);
215 printk(KERN_INFO "%s: %s found at %#3x, ", dev->name, cardname, ioaddr);
217 /* Fill in the 'dev' fields. */
218 dev->base_addr = ioaddr;
220 /* Retrieve and print the ethernet address. */
221 for (i = 0; i < 6; i++)
222 printk(" %2.2x", dev->dev_addr[i] = inb(ioaddr + i));
224 err = -EAGAIN;
225 #ifdef jumpered_interrupts
226 /*
227 * If this board has jumpered interrupts, allocate the interrupt
228 * vector now. There is no point in waiting since no other device
229 * can use the interrupt, and this marks the irq as busy. Jumpered
230 * interrupts are typically not reported by the boards, and we must
231 * used autoIRQ to find them.
232 */
234 if (dev->irq == -1)
235 ; /* Do nothing: a user-level program will set it. */
236 else if (dev->irq < 2) { /* "Auto-IRQ" */
237 unsigned long irq_mask = probe_irq_on();
238 /* Trigger an interrupt here. */
240 dev->irq = probe_irq_off(irq_mask);
241 if (net_debug >= 2)
242 printk(" autoirq is %d", dev->irq);
243 } else if (dev->irq == 2)
244 /*
245 * Fixup for users that don't know that IRQ 2 is really
246 * IRQ9, or don't know which one to set.
247 */
248 dev->irq = 9;
250 {
251 int irqval = request_irq(dev->irq, &net_interrupt, 0, cardname, dev);
252 if (irqval) {
253 printk("%s: unable to get IRQ %d (irqval=%d).\n",
254 dev->name, dev->irq, irqval);
255 goto out;
256 }
257 }
258 #endif /* jumpered interrupt */
259 #ifdef jumpered_dma
260 /*
261 * If we use a jumpered DMA channel, that should be probed for and
262 * allocated here as well. See lance.c for an example.
263 */
264 if (dev->dma == 0) {
265 if (request_dma(dev->dma, cardname)) {
266 printk("DMA %d allocation failed.\n", dev->dma);
267 goto out1;
268 } else
269 printk(", assigned DMA %d.\n", dev->dma);
270 } else {
271 short dma_status, new_dma_status;
273 /* Read the DMA channel status registers. */
274 dma_status = ((inb(DMA1_STAT_REG) >> 4) & 0x0f) |
275 (inb(DMA2_STAT_REG) & 0xf0);
276 /* Trigger a DMA request, perhaps pause a bit. */
277 outw(0x1234, ioaddr + 8);
278 /* Re-read the DMA status registers. */
279 new_dma_status = ((inb(DMA1_STAT_REG) >> 4) & 0x0f) |
280 (inb(DMA2_STAT_REG) & 0xf0);
281 /*
282 * Eliminate the old and floating requests,
283 * and DMA4 the cascade.
284 */
285 new_dma_status ^= dma_status;
286 new_dma_status &= ~0x10;
287 for (i = 7; i > 0; i--)
288 if (test_bit(i, &new_dma_status)) {
289 dev->dma = i;
290 break;
291 }
292 if (i <= 0) {
293 printk("DMA probe failed.\n");
294 goto out1;
295 }
296 if (request_dma(dev->dma, cardname)) {
297 printk("probed DMA %d allocation failed.\n", dev->dma);
298 goto out1;
299 }
300 }
301 #endif /* jumpered DMA */
303 np = netdev_priv(dev);
304 spin_lock_init(&np->lock);
306 dev->open = net_open;
307 dev->stop = net_close;
308 dev->hard_start_xmit = net_send_packet;
309 dev->get_stats = net_get_stats;
310 dev->set_multicast_list = &set_multicast_list;
312 dev->tx_timeout = &net_tx_timeout;
313 dev->watchdog_timeo = MY_TX_TIMEOUT;
315 err = register_netdev(dev);
316 if (err)
317 goto out2;
318 return 0;
319 out2:
320 #ifdef jumpered_dma
321 free_dma(dev->dma);
322 #endif
323 out1:
324 #ifdef jumpered_interrupts
325 free_irq(dev->irq, dev);
326 #endif
327 out:
328 release_region(base_addr, NETCARD_IO_EXTENT);
329 return err;
330 }
332 static void net_tx_timeout(struct net_device *dev)
333 {
334 struct net_local *np = netdev_priv(dev);
336 printk(KERN_WARNING "%s: transmit timed out, %s?\n", dev->name,
337 tx_done(dev) ? "IRQ conflict" : "network cable problem");
339 /* Try to restart the adaptor. */
340 chipset_init(dev, 1);
342 np->stats.tx_errors++;
344 /* If we have space available to accept new transmit
345 * requests, wake up the queueing layer. This would
346 * be the case if the chipset_init() call above just
347 * flushes out the tx queue and empties it.
348 *
349 * If instead, the tx queue is retained then the
350 * netif_wake_queue() call should be placed in the
351 * TX completion interrupt handler of the driver instead
352 * of here.
353 */
354 if (!tx_full(dev))
355 netif_wake_queue(dev);
356 }
358 /*
359 * Open/initialize the board. This is called (in the current kernel)
360 * sometime after booting when the 'ifconfig' program is run.
361 *
362 * This routine should set everything up anew at each open, even
363 * registers that "should" only need to be set once at boot, so that
364 * there is non-reboot way to recover if something goes wrong.
365 */
366 static int
367 net_open(struct net_device *dev)
368 {
369 struct net_local *np = netdev_priv(dev);
370 int ioaddr = dev->base_addr;
371 /*
372 * This is used if the interrupt line can turned off (shared).
373 * See 3c503.c for an example of selecting the IRQ at config-time.
374 */
375 if (request_irq(dev->irq, &net_interrupt, 0, cardname, dev)) {
376 return -EAGAIN;
377 }
378 /*
379 * Always allocate the DMA channel after the IRQ,
380 * and clean up on failure.
381 */
382 if (request_dma(dev->dma, cardname)) {
383 free_irq(dev->irq, dev);
384 return -EAGAIN;
385 }
387 /* Reset the hardware here. Don't forget to set the station address. */
388 chipset_init(dev, 1);
389 outb(0x00, ioaddr);
390 np->open_time = jiffies;
392 /* We are now ready to accept transmit requeusts from
393 * the queueing layer of the networking.
394 */
395 netif_start_queue(dev);
397 return 0;
398 }
400 /* This will only be invoked if your driver is _not_ in XOFF state.
401 * What this means is that you need not check it, and that this
402 * invariant will hold if you make sure that the netif_*_queue()
403 * calls are done at the proper times.
404 */
405 static int net_send_packet(struct sk_buff *skb, struct net_device *dev)
406 {
407 struct net_local *np = netdev_priv(dev);
408 int ioaddr = dev->base_addr;
409 short length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
410 unsigned char *buf = skb->data;
412 /* If some error occurs while trying to transmit this
413 * packet, you should return '1' from this function.
414 * In such a case you _may not_ do anything to the
415 * SKB, it is still owned by the network queueing
416 * layer when an error is returned. This means you
417 * may not modify any SKB fields, you may not free
418 * the SKB, etc.
419 */
421 #if TX_RING
422 /* This is the most common case for modern hardware.
423 * The spinlock protects this code from the TX complete
424 * hardware interrupt handler. Queue flow control is
425 * thus managed under this lock as well.
426 */
427 spin_lock_irq(&np->lock);
429 add_to_tx_ring(np, skb, length);
430 dev->trans_start = jiffies;
432 /* If we just used up the very last entry in the
433 * TX ring on this device, tell the queueing
434 * layer to send no more.
435 */
436 if (tx_full(dev))
437 netif_stop_queue(dev);
439 /* When the TX completion hw interrupt arrives, this
440 * is when the transmit statistics are updated.
441 */
443 spin_unlock_irq(&np->lock);
444 #else
445 /* This is the case for older hardware which takes
446 * a single transmit buffer at a time, and it is
447 * just written to the device via PIO.
448 *
449 * No spin locking is needed since there is no TX complete
450 * event. If by chance your card does have a TX complete
451 * hardware IRQ then you may need to utilize np->lock here.
452 */
453 hardware_send_packet(ioaddr, buf, length);
454 np->stats.tx_bytes += skb->len;
456 dev->trans_start = jiffies;
458 /* You might need to clean up and record Tx statistics here. */
459 if (inw(ioaddr) == /*RU*/81)
460 np->stats.tx_aborted_errors++;
461 dev_kfree_skb (skb);
462 #endif
464 return 0;
465 }
467 #if TX_RING
468 /* This handles TX complete events posted by the device
469 * via interrupts.
470 */
471 void net_tx(struct net_device *dev)
472 {
473 struct net_local *np = netdev_priv(dev);
474 int entry;
476 /* This protects us from concurrent execution of
477 * our dev->hard_start_xmit function above.
478 */
479 spin_lock(&np->lock);
481 entry = np->tx_old;
482 while (tx_entry_is_sent(np, entry)) {
483 struct sk_buff *skb = np->skbs[entry];
485 np->stats.tx_bytes += skb->len;
486 dev_kfree_skb_irq (skb);
488 entry = next_tx_entry(np, entry);
489 }
490 np->tx_old = entry;
492 /* If we had stopped the queue due to a "tx full"
493 * condition, and space has now been made available,
494 * wake up the queue.
495 */
496 if (netif_queue_stopped(dev) && ! tx_full(dev))
497 netif_wake_queue(dev);
499 spin_unlock(&np->lock);
500 }
501 #endif
503 /*
504 * The typical workload of the driver:
505 * Handle the network interface interrupts.
506 */
507 static irqreturn_t net_interrupt(int irq, void *dev_id, struct pt_regs * regs)
508 {
509 struct net_device *dev = dev_id;
510 struct net_local *np;
511 int ioaddr, status;
512 int handled = 0;
514 ioaddr = dev->base_addr;
516 np = netdev_priv(dev);
517 status = inw(ioaddr + 0);
519 if (status == 0)
520 goto out;
521 handled = 1;
523 if (status & RX_INTR) {
524 /* Got a packet(s). */
525 net_rx(dev);
526 }
527 #if TX_RING
528 if (status & TX_INTR) {
529 /* Transmit complete. */
530 net_tx(dev);
531 np->stats.tx_packets++;
532 netif_wake_queue(dev);
533 }
534 #endif
535 if (status & COUNTERS_INTR) {
536 /* Increment the appropriate 'localstats' field. */
537 np->stats.tx_window_errors++;
538 }
539 out:
540 return IRQ_RETVAL(handled);
541 }
543 /* We have a good packet(s), get it/them out of the buffers. */
544 static void
545 net_rx(struct net_device *dev)
546 {
547 struct net_local *lp = netdev_priv(dev);
548 int ioaddr = dev->base_addr;
549 int boguscount = 10;
551 do {
552 int status = inw(ioaddr);
553 int pkt_len = inw(ioaddr);
555 if (pkt_len == 0) /* Read all the frames? */
556 break; /* Done for now */
558 if (status & 0x40) { /* There was an error. */
559 lp->stats.rx_errors++;
560 if (status & 0x20) lp->stats.rx_frame_errors++;
561 if (status & 0x10) lp->stats.rx_over_errors++;
562 if (status & 0x08) lp->stats.rx_crc_errors++;
563 if (status & 0x04) lp->stats.rx_fifo_errors++;
564 } else {
565 /* Malloc up new buffer. */
566 struct sk_buff *skb;
568 lp->stats.rx_bytes+=pkt_len;
570 skb = dev_alloc_skb(pkt_len);
571 if (skb == NULL) {
572 printk(KERN_NOTICE "%s: Memory squeeze, dropping packet.\n",
573 dev->name);
574 lp->stats.rx_dropped++;
575 break;
576 }
577 skb->dev = dev;
579 /* 'skb->data' points to the start of sk_buff data area. */
580 memcpy(skb_put(skb,pkt_len), (void*)dev->rmem_start,
581 pkt_len);
582 /* or */
583 insw(ioaddr, skb->data, (pkt_len + 1) >> 1);
585 netif_rx(skb);
586 dev->last_rx = jiffies;
587 lp->stats.rx_packets++;
588 lp->stats.rx_bytes += pkt_len;
589 }
590 } while (--boguscount);
592 return;
593 }
595 /* The inverse routine to net_open(). */
596 static int
597 net_close(struct net_device *dev)
598 {
599 struct net_local *lp = netdev_priv(dev);
600 int ioaddr = dev->base_addr;
602 lp->open_time = 0;
604 netif_stop_queue(dev);
606 /* Flush the Tx and disable Rx here. */
608 disable_dma(dev->dma);
610 /* If not IRQ or DMA jumpered, free up the line. */
611 outw(0x00, ioaddr+0); /* Release the physical interrupt line. */
613 free_irq(dev->irq, dev);
614 free_dma(dev->dma);
616 /* Update the statistics here. */
618 return 0;
620 }
622 /*
623 * Get the current statistics.
624 * This may be called with the card open or closed.
625 */
626 static struct net_device_stats *net_get_stats(struct net_device *dev)
627 {
628 struct net_local *lp = netdev_priv(dev);
629 short ioaddr = dev->base_addr;
631 /* Update the statistics from the device registers. */
632 lp->stats.rx_missed_errors = inw(ioaddr+1);
633 return &lp->stats;
634 }
636 /*
637 * Set or clear the multicast filter for this adaptor.
638 * num_addrs == -1 Promiscuous mode, receive all packets
639 * num_addrs == 0 Normal mode, clear multicast list
640 * num_addrs > 0 Multicast mode, receive normal and MC packets,
641 * and do best-effort filtering.
642 */
643 static void
644 set_multicast_list(struct net_device *dev)
645 {
646 short ioaddr = dev->base_addr;
647 if (dev->flags&IFF_PROMISC)
648 {
649 /* Enable promiscuous mode */
650 outw(MULTICAST|PROMISC, ioaddr);
651 }
652 else if((dev->flags&IFF_ALLMULTI) || dev->mc_count > HW_MAX_ADDRS)
653 {
654 /* Disable promiscuous mode, use normal mode. */
655 hardware_set_filter(NULL);
657 outw(MULTICAST, ioaddr);
658 }
659 else if(dev->mc_count)
660 {
661 /* Walk the address list, and load the filter */
662 hardware_set_filter(dev->mc_list);
664 outw(MULTICAST, ioaddr);
665 }
666 else
667 outw(0, ioaddr);
668 }
670 #ifdef MODULE
672 static struct net_device *this_device;
673 static int io = 0x300;
674 static int irq;
675 static int dma;
676 static int mem;
677 MODULE_LICENSE("GPL");
679 int init_module(void)
680 {
681 struct net_device *dev;
682 int result;
684 if (io == 0)
685 printk(KERN_WARNING "%s: You shouldn't use auto-probing with insmod!\n",
686 cardname);
687 dev = alloc_etherdev(sizeof(struct net_local));
688 if (!dev)
689 return -ENOMEM;
691 /* Copy the parameters from insmod into the device structure. */
692 dev->base_addr = io;
693 dev->irq = irq;
694 dev->dma = dma;
695 dev->mem_start = mem;
696 if (do_netcard_probe(dev) == 0) {
697 this_device = dev;
698 return 0;
699 }
700 free_netdev(dev);
701 return -ENXIO;
702 }
704 void
705 cleanup_module(void)
706 {
707 unregister_netdev(this_device);
708 cleanup_card(this_device);
709 free_netdev(this_device);
710 }
712 #endif /* MODULE */
714 /*
715 * Local variables:
716 * compile-command:
717 * gcc -D__KERNEL__ -Wall -Wstrict-prototypes -Wwrite-strings
718 * -Wredundant-decls -O2 -m486 -c skeleton.c
719 * version-control: t
720 * kept-new-versions: 5
721 * tab-width: 4
722 * c-indent-level: 4
723 * End:
724 */