ia64/linux-2.6.18-xen.hg

annotate arch/alpha/kernel/pci_impl.h @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
rev   line source
ian@0 1 /*
ian@0 2 * linux/arch/alpha/kernel/pci_impl.h
ian@0 3 *
ian@0 4 * This file contains declarations and inline functions for interfacing
ian@0 5 * with the PCI initialization routines.
ian@0 6 */
ian@0 7
ian@0 8 struct pci_dev;
ian@0 9 struct pci_controller;
ian@0 10 struct pci_iommu_arena;
ian@0 11
ian@0 12 /*
ian@0 13 * We can't just blindly use 64K for machines with EISA busses; they
ian@0 14 * may also have PCI-PCI bridges present, and then we'd configure the
ian@0 15 * bridge incorrectly.
ian@0 16 *
ian@0 17 * Also, we start at 0x8000 or 0x9000, in hopes to get all devices'
ian@0 18 * IO space areas allocated *before* 0xC000; this is because certain
ian@0 19 * BIOSes (Millennium for one) use PCI Config space "mechanism #2"
ian@0 20 * accesses to probe the bus. If a device's registers appear at 0xC000,
ian@0 21 * it may see an INx/OUTx at that address during BIOS emulation of the
ian@0 22 * VGA BIOS, and some cards, notably Adaptec 2940UW, take mortal offense.
ian@0 23 */
ian@0 24
ian@0 25 #define EISA_DEFAULT_IO_BASE 0x9000 /* start above 8th slot */
ian@0 26 #define DEFAULT_IO_BASE 0x8000 /* start at 8th slot */
ian@0 27
ian@0 28 /*
ian@0 29 * We try to make the DEFAULT_MEM_BASE addresses *always* have more than
ian@0 30 * a single bit set. This is so that devices like the broken Myrinet card
ian@0 31 * will always have a PCI memory address that will never match a IDSEL
ian@0 32 * address in PCI Config space, which can cause problems with early rev cards.
ian@0 33 */
ian@0 34
ian@0 35 /*
ian@0 36 * An XL is AVANTI (APECS) family, *but* it has only 27 bits of ISA address
ian@0 37 * that get passed through the PCI<->ISA bridge chip. Although this causes
ian@0 38 * us to set the PCI->Mem window bases lower than normal, we still allocate
ian@0 39 * PCI bus devices' memory addresses *below* the low DMA mapping window,
ian@0 40 * and hope they fit below 64Mb (to avoid conflicts), and so that they can
ian@0 41 * be accessed via SPARSE space.
ian@0 42 *
ian@0 43 * We accept the risk that a broken Myrinet card will be put into a true XL
ian@0 44 * and thus can more easily run into the problem described below.
ian@0 45 */
ian@0 46 #define XL_DEFAULT_MEM_BASE ((16+2)*1024*1024) /* 16M to 64M-1 is avail */
ian@0 47
ian@0 48 /*
ian@0 49 * APECS and LCA have only 34 bits for physical addresses, thus limiting PCI
ian@0 50 * bus memory addresses for SPARSE access to be less than 128Mb.
ian@0 51 */
ian@0 52 #define APECS_AND_LCA_DEFAULT_MEM_BASE ((16+2)*1024*1024)
ian@0 53
ian@0 54 /*
ian@0 55 * Because MCPCIA and T2 core logic support more bits for
ian@0 56 * physical addresses, they should allow an expanded range of SPARSE
ian@0 57 * memory addresses. However, we do not use them all, in order to
ian@0 58 * avoid the HAE manipulation that would be needed.
ian@0 59 */
ian@0 60 #define MCPCIA_DEFAULT_MEM_BASE ((32+2)*1024*1024)
ian@0 61 #define T2_DEFAULT_MEM_BASE ((16+1)*1024*1024)
ian@0 62
ian@0 63 /*
ian@0 64 * Because CIA and PYXIS have more bits for physical addresses,
ian@0 65 * they support an expanded range of SPARSE memory addresses.
ian@0 66 */
ian@0 67 #define DEFAULT_MEM_BASE ((128+16)*1024*1024)
ian@0 68
ian@0 69 /* ??? Experimenting with no HAE for CIA. */
ian@0 70 #define CIA_DEFAULT_MEM_BASE ((32+2)*1024*1024)
ian@0 71
ian@0 72 #define IRONGATE_DEFAULT_MEM_BASE ((256*8-16)*1024*1024)
ian@0 73
ian@0 74 #define DEFAULT_AGP_APER_SIZE (64*1024*1024)
ian@0 75
ian@0 76 /*
ian@0 77 * A small note about bridges and interrupts. The DECchip 21050 (and
ian@0 78 * later) adheres to the PCI-PCI bridge specification. This says that
ian@0 79 * the interrupts on the other side of a bridge are swizzled in the
ian@0 80 * following manner:
ian@0 81 *
ian@0 82 * Dev Interrupt Interrupt
ian@0 83 * Pin on Pin on
ian@0 84 * Device Connector
ian@0 85 *
ian@0 86 * 4 A A
ian@0 87 * B B
ian@0 88 * C C
ian@0 89 * D D
ian@0 90 *
ian@0 91 * 5 A B
ian@0 92 * B C
ian@0 93 * C D
ian@0 94 * D A
ian@0 95 *
ian@0 96 * 6 A C
ian@0 97 * B D
ian@0 98 * C A
ian@0 99 * D B
ian@0 100 *
ian@0 101 * 7 A D
ian@0 102 * B A
ian@0 103 * C B
ian@0 104 * D C
ian@0 105 *
ian@0 106 * Where A = pin 1, B = pin 2 and so on and pin=0 = default = A.
ian@0 107 * Thus, each swizzle is ((pin-1) + (device#-4)) % 4
ian@0 108 *
ian@0 109 * The following code swizzles for exactly one bridge. The routine
ian@0 110 * common_swizzle below handles multiple bridges. But there are a
ian@0 111 * couple boards that do strange things, so we define this here.
ian@0 112 */
ian@0 113
ian@0 114 static inline u8 bridge_swizzle(u8 pin, u8 slot)
ian@0 115 {
ian@0 116 return (((pin-1) + slot) % 4) + 1;
ian@0 117 }
ian@0 118
ian@0 119
ian@0 120 /* The following macro is used to implement the table-based irq mapping
ian@0 121 function for all single-bus Alphas. */
ian@0 122
ian@0 123 #define COMMON_TABLE_LOOKUP \
ian@0 124 ({ long _ctl_ = -1; \
ian@0 125 if (slot >= min_idsel && slot <= max_idsel && pin < irqs_per_slot) \
ian@0 126 _ctl_ = irq_tab[slot - min_idsel][pin]; \
ian@0 127 _ctl_; })
ian@0 128
ian@0 129
ian@0 130 /* A PCI IOMMU allocation arena. There are typically two of these
ian@0 131 regions per bus. */
ian@0 132 /* ??? The 8400 has a 32-byte pte entry, and the entire table apparently
ian@0 133 lives directly on the host bridge (no tlb?). We don't support this
ian@0 134 machine, but if we ever did, we'd need to parameterize all this quite
ian@0 135 a bit further. Probably with per-bus operation tables. */
ian@0 136
ian@0 137 struct pci_iommu_arena
ian@0 138 {
ian@0 139 spinlock_t lock;
ian@0 140 struct pci_controller *hose;
ian@0 141 #define IOMMU_INVALID_PTE 0x2 /* 32:63 bits MBZ */
ian@0 142 #define IOMMU_RESERVED_PTE 0xface
ian@0 143 unsigned long *ptes;
ian@0 144 dma_addr_t dma_base;
ian@0 145 unsigned int size;
ian@0 146 unsigned int next_entry;
ian@0 147 unsigned int align_entry;
ian@0 148 };
ian@0 149
ian@0 150 #if defined(CONFIG_ALPHA_SRM) && \
ian@0 151 (defined(CONFIG_ALPHA_CIA) || defined(CONFIG_ALPHA_LCA))
ian@0 152 # define NEED_SRM_SAVE_RESTORE
ian@0 153 #else
ian@0 154 # undef NEED_SRM_SAVE_RESTORE
ian@0 155 #endif
ian@0 156
ian@0 157 #if defined(CONFIG_ALPHA_GENERIC) || defined(NEED_SRM_SAVE_RESTORE)
ian@0 158 # define ALPHA_RESTORE_SRM_SETUP
ian@0 159 #else
ian@0 160 # undef ALPHA_RESTORE_SRM_SETUP
ian@0 161 #endif
ian@0 162
ian@0 163 #ifdef ALPHA_RESTORE_SRM_SETUP
ian@0 164 /* Store PCI device configuration left by SRM here. */
ian@0 165 struct pdev_srm_saved_conf
ian@0 166 {
ian@0 167 struct pdev_srm_saved_conf *next;
ian@0 168 struct pci_dev *dev;
ian@0 169 };
ian@0 170
ian@0 171 extern void pci_restore_srm_config(void);
ian@0 172 #else
ian@0 173 #define pdev_save_srm_config(dev) do {} while (0)
ian@0 174 #define pci_restore_srm_config() do {} while (0)
ian@0 175 #endif
ian@0 176
ian@0 177 /* The hose list. */
ian@0 178 extern struct pci_controller *hose_head, **hose_tail;
ian@0 179 extern struct pci_controller *pci_isa_hose;
ian@0 180
ian@0 181 /* Indicate that we trust the console to configure things properly. */
ian@0 182 extern int pci_probe_only;
ian@0 183
ian@0 184 extern unsigned long alpha_agpgart_size;
ian@0 185
ian@0 186 extern void common_init_pci(void);
ian@0 187 extern u8 common_swizzle(struct pci_dev *, u8 *);
ian@0 188 extern struct pci_controller *alloc_pci_controller(void);
ian@0 189 extern struct resource *alloc_resource(void);
ian@0 190
ian@0 191 extern struct pci_iommu_arena *iommu_arena_new_node(int,
ian@0 192 struct pci_controller *,
ian@0 193 dma_addr_t, unsigned long,
ian@0 194 unsigned long);
ian@0 195 extern struct pci_iommu_arena *iommu_arena_new(struct pci_controller *,
ian@0 196 dma_addr_t, unsigned long,
ian@0 197 unsigned long);
ian@0 198 extern const char *const pci_io_names[];
ian@0 199 extern const char *const pci_mem_names[];
ian@0 200 extern const char pci_hae0_name[];
ian@0 201
ian@0 202 extern unsigned long size_for_memory(unsigned long max);
ian@0 203
ian@0 204 extern int iommu_reserve(struct pci_iommu_arena *, long, long);
ian@0 205 extern int iommu_release(struct pci_iommu_arena *, long, long);
ian@0 206 extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *);
ian@0 207 extern int iommu_unbind(struct pci_iommu_arena *, long, long);
ian@0 208
ian@0 209