]> xenbits.xensource.com Git - people/pauldu/xen.git/commitdiff
bitkeeper revision 1.1159.203.1 (41af4018fV44p80Jw5YxFLlk5sh8jg)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Thu, 2 Dec 2004 16:17:28 +0000 (16:17 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Thu, 2 Dec 2004 16:17:28 +0000 (16:17 +0000)
Cleaned balloon driver internals. Better integration with device
drivers that also inc/dec memory reservation. Should be more
informative (cat /proc/xen/balloon) and more stable. Needs testing
with Linux 2.4...

.rootkeys
linux-2.4.28-xen-sparse/mkbuildtree
linux-2.6.9-xen-sparse/arch/xen/i386/kernel/pci-dma.c
linux-2.6.9-xen-sparse/arch/xen/i386/mm/hypervisor.c
linux-2.6.9-xen-sparse/drivers/xen/Makefile
linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c
linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c
linux-2.6.9-xen-sparse/drivers/xen/netfront/netfront.c
linux-2.6.9-xen-sparse/drivers/xen/privcmd/privcmd.c
linux-2.6.9-xen-sparse/include/asm-xen/balloon.h [new file with mode: 0644]
linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h

index 31497b58ac63388125e6d421654f7de88b8ce15c..44aaa3ca194ace48efe462991738b01168643aaf 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
 41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/vga.h
 40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/xor.h
+41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.9-xen-sparse/include/asm-xen/balloon.h
 40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.9-xen-sparse/include/asm-xen/ctrl_if.h
 40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.9-xen-sparse/include/asm-xen/evtchn.h
 419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.9-xen-sparse/include/asm-xen/foreign_page.h
index 382d90588bbb698e0ad6d8bf7ef7a9ff76a74dcb..a0e52c2c5ce9fdb4a5a6c7e628303bc8cc2fa952 100755 (executable)
@@ -204,6 +204,7 @@ ln -sf ../asm-i386/unaligned.h
 ln -sf ../asm-i386/unistd.h 
 ln -sf ../asm-i386/user.h 
 ln -sf ../asm-i386/vm86.h 
+ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h
 ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h
 ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h
 ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h
index b7939cff41ca5fbda39e1d9437f339cf4672e784..c3462814051a0fe8bdf74d8d55ca5af5d76e3a7d 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/pci.h>
 #include <linux/version.h>
 #include <asm/io.h>
+#include <asm-xen/balloon.h>
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
 #define pte_offset_kernel pte_offset
@@ -37,9 +38,12 @@ xen_contig_memory(unsigned long vstart, unsigned int order)
        pgd_t         *pgd; 
        pmd_t         *pmd;
        pte_t         *pte;
-       unsigned long  pfn, i;
+       unsigned long  pfn, i, flags;
 
        scrub_pages(vstart, 1 << order);
+
+        balloon_lock(flags);
+
        /* 1. Zap current PTEs, giving away the underlying pages. */
        for (i = 0; i < (1<<order); i++) {
                pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
@@ -70,6 +74,8 @@ xen_contig_memory(unsigned long vstart, unsigned int order)
        }
        /* Flush updates through and flush the TLB. */
        xen_tlb_flush();
+
+        balloon_unlock(flags);
 }
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
index d950b8a8e2d178283086f704194846d7f8b49bf5..81d0b8450c6a42e37f283408a01bb5ce8db00907 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/multicall.h>
+#include <asm-xen/balloon.h>
 
 /*
  * This suffices to protect us if we ever move to SMP domains.
@@ -352,7 +353,6 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages)
     unsigned long *pfn_array;
     unsigned long  vstart;
     unsigned long  i;
-    int            ret;
     unsigned int   order = get_order(pages*PAGE_SIZE);
 
     vstart = __get_free_pages(GFP_KERNEL, order);
@@ -378,57 +378,11 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages)
     /* Flush updates through and flush the TLB. */
     xen_tlb_flush();
 
-    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
-                                pfn_array, 1<<order, 0);
-    if ( unlikely(ret != (1<<order)) )
-    {
-        printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret);
-        BUG();
-    }
+    balloon_put_pages(pfn_array, 1 << order);
 
     vfree(pfn_array);
 
     return vstart;
 }
 
-void deallocate_lowmem_region(unsigned long vstart, unsigned long pages)
-{
-    pgd_t         *pgd; 
-    pmd_t         *pmd;
-    pte_t         *pte;
-    unsigned long *pfn_array;
-    unsigned long  i;
-    int            ret;
-    unsigned int   order = get_order(pages*PAGE_SIZE);
-
-    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
-    if ( pfn_array == NULL )
-        BUG();
-
-    ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
-                                pfn_array, 1<<order, 0);
-    if ( unlikely(ret != (1<<order)) )
-    {
-        printk(KERN_WARNING "Unable to increase memory reservation (%d)\n",
-               ret);
-        BUG();
-    }
-
-    for ( i = 0; i < (1<<order); i++ )
-    {
-        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
-        pmd = pmd_offset(pgd, (vstart + (i*PAGE_SIZE)));
-        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
-        queue_l1_entry_update(pte, (pfn_array[i]<<PAGE_SHIFT)|__PAGE_KERNEL);
-        queue_machphys_update(pfn_array[i], __pa(vstart)>>PAGE_SHIFT);
-        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i];
-    }
-
-    flush_page_update_queue();
-
-    vfree(pfn_array);
-
-    free_pages(vstart, order);
-}
-
 #endif /* CONFIG_XEN_PHYSDEV_ACCESS */
index e181171a61a50bd713b0b7afcdeadd98b133a908..0bfb5a50c39ca7ae74d133adb535ac24ad1aef63 100644 (file)
@@ -2,9 +2,9 @@
 
 obj-y  += console/
 obj-y  += evtchn/
-obj-y  += privcmd/
-obj-y   += balloon/
+obj-y  += balloon/
 
+obj-$(CONFIG_XEN_PRIVILEGED_GUEST)     += privcmd/
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
index 94658f1e54b37a6bf92ce4fb39d5d87c2e546b78..772d52b1ec6dd7b34cea442b643d052f716c0328 100644 (file)
@@ -29,8 +29,8 @@
  */
 
 #include <linux/config.h>
-#include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <asm-xen/xen_proc.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/ctrl_if.h>
+#include <asm-xen/balloon.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <linux/list.h>
 
+#define MIN_TARGET ((16 << 20) >> PAGE_SHIFT) /* 16MB */
+
 static struct proc_dir_entry *balloon_pde;
 
-unsigned long credit;
+static DECLARE_MUTEX(balloon_mutex);
+spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
+
+/* We aim for 'current allocation' == 'target allocation'. */
 static unsigned long current_pages;
+static unsigned long target_pages;
+
+/* We may hit the hard limit in Xen. If we do then we remember it. */
+static unsigned long hard_limit;
+
+/*
+ * Drivers may alter the memory reservation independently, but they must
+ * inform the balloon driver so that we can avoid hitting the hard limit.
+ */
+static unsigned long driver_pages;
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static LIST_HEAD(ballooned_pages);
+static unsigned long balloon_low, balloon_high;
+
+/* Main work function, always executed in process context. */
+static void balloon_process(void *unused);
+static DECLARE_WORK(balloon_worker, balloon_process, NULL);
+static struct timer_list balloon_timer;
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 /* Use the private and mapping fields of struct page as a list. */
@@ -75,17 +100,24 @@ static unsigned long current_pages;
 #define WPRINTK(fmt, args...) \
     printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
-/* List of ballooned pages, threaded through the mem_map array. */
-LIST_HEAD(ballooned_pages);
-
 /* balloon_append: add the given page to the balloon. */
-void balloon_append(struct page *page)
+static void balloon_append(struct page *page)
 {
-    list_add(PAGE_TO_LIST(page), &ballooned_pages);
+    /* Low memory is re-populated first, so highmem pages go at list tail. */
+    if ( PageHighMem(page) )
+    {
+        list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+        balloon_high++;
+    }
+    else
+    {
+        list_add(PAGE_TO_LIST(page), &ballooned_pages);
+        balloon_low++;
+    }
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-struct page *balloon_retrieve(void)
+static struct page *balloon_retrieve(void)
 {
     struct page *page;
 
@@ -94,6 +126,12 @@ struct page *balloon_retrieve(void)
 
     page = LIST_TO_PAGE(ballooned_pages.next);
     UNLIST_PAGE(page);
+
+    if ( PageHighMem(page) )
+        balloon_high--;
+    else
+        balloon_low--;
+
     return page;
 }
 
@@ -111,194 +149,148 @@ static inline pte_t *get_ptep(unsigned long addr)
     return pte_offset_kernel(pmd, addr);
 }
 
-/* Main function for relinquishing memory. */
-static unsigned long inflate_balloon(unsigned long num_pages)
+static void balloon_alarm(unsigned long unused)
 {
-    unsigned long *parray, *currp, curraddr, ret = 0, i, j, mfn, pfn;
-    struct page *page;
-
-    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
-    if ( parray == NULL )
-    {
-        WPRINTK("inflate_balloon: Unable to vmalloc parray\n");
-        return -ENOMEM;
-    }
+    schedule_work(&balloon_worker);
+}
 
-    currp = parray;
+static unsigned long current_target(void)
+{
+    unsigned long target = min(target_pages, hard_limit);
+    if ( target > (current_pages + balloon_low + balloon_high) )
+        target = current_pages + balloon_low + balloon_high;
+    if ( target < MIN_TARGET )
+        target = MIN_TARGET;
+    return target;
+}
 
-    for ( i = 0; i < num_pages; i++, currp++ )
-    {
-        page = alloc_page(GFP_HIGHUSER);
-        pfn  = page - mem_map;
+static void balloon_process(void *unused)
+{
+    unsigned long *mfn_list, pfn, i, flags;
+    struct page   *page;
+    long           credit, debt, rc;
+    void          *v;
 
-        /* If allocation fails then free all reserved pages. */
-        if ( page == NULL )
-        {
-            printk(KERN_ERR "Unable to inflate balloon by %ld, only"
-                   " %ld pages free.", num_pages, i);
-            currp = parray;
-            for ( j = 0; j < i; j++, currp++ )
-                __free_page((struct page *) (mem_map + *currp));
-
-            ret = -EFAULT;
-            goto cleanup;
-        }
+    down(&balloon_mutex);
 
-        *currp = pfn;
-    }
+ retry:
+    mfn_list = NULL;
 
-    for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
+    if ( (credit = current_target() - current_pages) > 0 )
     {
-        mfn      = phys_to_machine_mapping[*currp];
-        curraddr = (unsigned long)page_address(mem_map + *currp);
-        /* Blow away page contents for security, and also p.t. ref if any. */
-        if ( curraddr != 0 )
+        mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
+        if ( mfn_list == NULL )
+            goto out;
+
+        balloon_lock(flags);
+        rc = HYPERVISOR_dom_mem_op(
+            MEMOP_increase_reservation, mfn_list, credit, 0);
+        balloon_unlock(flags);
+        if ( rc < credit )
         {
-            scrub_pages(curraddr, 1);
-            queue_l1_entry_update(get_ptep(curraddr), 0);
+            /* We hit the Xen hard limit: reprobe. */
+            if ( HYPERVISOR_dom_mem_op(
+                MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
+                BUG();
+            hard_limit = current_pages + rc - driver_pages;
+            vfree(mfn_list);
+            goto retry;
         }
-#ifdef CONFIG_XEN_SCRUB_PAGES
-        else
+
+        for ( i = 0; i < credit; i++ )
         {
-            void *p = kmap(&mem_map[*currp]);
-            scrub_pages(p, 1);
-            kunmap(&mem_map[*currp]);
+            if ( (page = balloon_retrieve()) == NULL )
+                BUG();
+
+            pfn = page - mem_map;
+            if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
+                BUG();
+
+            /* Update P->M and M->P tables. */
+            phys_to_machine_mapping[pfn] = mfn_list[i];
+            queue_machphys_update(mfn_list[i], pfn);
+            
+            /* Link back into the page tables if it's not a highmem page. */
+            if ( pfn < max_low_pfn )
+                queue_l1_entry_update(
+                    get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
+                    (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
+            
+            /* Finally, relinquish the memory back to the system allocator. */
+            ClearPageReserved(page);
+            set_page_count(page, 1);
+            __free_page(page);
         }
-#endif
 
-        balloon_append(&mem_map[*currp]);
-
-        phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY;
-        *currp = mfn;
+        current_pages += credit;
     }
-
-    /* Flush updates through and flush the TLB. */
-    xen_tlb_flush();
-
-    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
-                                parray, num_pages, 0);
-    if ( unlikely(ret != num_pages) )
+    else if ( credit < 0 )
     {
-        printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
-        goto cleanup;
-    }
+        debt = -credit;
 
-    credit += num_pages;
-    ret = num_pages;
+        mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
+        if ( mfn_list == NULL )
+            goto out;
 
- cleanup:
-    vfree(parray);
-
-    return ret;
-}
+        for ( i = 0; i < debt; i++ )
+        {
+            if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
+            {
+                debt = i;
+                break;
+            }
+
+            pfn = page - mem_map;
+            mfn_list[i] = phys_to_machine_mapping[pfn];
+            phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+
+            if ( !PageHighMem(page) )
+            {
+                v = phys_to_virt((page - mem_map) << PAGE_SHIFT);
+                scrub_pages(v, 1);
+                queue_l1_entry_update(get_ptep((unsigned long)v), 0);
+            }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+            else
+            {
+                v = kmap(page);
+                scrub_pages(v, 1);
+                kunmap(page);
+            }
+#endif            
 
-/* Install a set of new pages (@mfn_list, @nr_mfns) into the memory map. */
-static unsigned long process_returned_pages(
-    unsigned long *mfn_list, unsigned long nr_mfns)
-{
-    unsigned long pfn, i;
-    struct page *page;
+            balloon_append(page);
+        }
 
-    for ( i = 0; i < nr_mfns; i++ )
-    {
-        if ( (page = balloon_retrieve()) != NULL )
-            break;
+        /* Flush updates through and flush the TLB. */
+        xen_tlb_flush();
 
-        pfn = page - mem_map;
-        if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
+        if ( HYPERVISOR_dom_mem_op(
+            MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
             BUG();
 
-        /* Update P->M and M->P tables. */
-        phys_to_machine_mapping[pfn] = mfn_list[i];
-        queue_machphys_update(mfn_list[i], pfn);
-
-        /* Link back into the page tables if it's not a highmem page. */
-        if ( pfn < max_low_pfn )
-            queue_l1_entry_update(
-                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
-                (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
-
-        /* Finally, relinquish the memory back to the system allocator. */
-        ClearPageReserved(page);
-        set_page_count(page, 1);
-        __free_page(page);
-    }
-
-    return i;
-}
-
-unsigned long deflate_balloon(unsigned long num_pages)
-{
-    unsigned long ret;
-    unsigned long *parray;
-
-    if ( num_pages > credit )
-    {
-        printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
-               num_pages, credit);
-        return -EAGAIN;
-    }
-
-    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
-    if ( parray == NULL )
-    {
-        printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
-        return 0;
-    }
-
-    ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
-                                parray, num_pages, 0);
-    if ( unlikely(ret != num_pages) )
-    {
-        printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
-               ret);
-        goto cleanup;
-    }
-
-    if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
-    {
-        printk(KERN_WARNING
-               "deflate_balloon: restored only %lx of %lx pages.\n",
-           ret, num_pages);
-        goto cleanup;
+        current_pages -= debt;
     }
 
-    ret = num_pages;
-    credit -= num_pages;
+ out:
+    if ( mfn_list != NULL )
+        vfree(mfn_list);
 
- cleanup:
-    vfree(parray);
+    /* Schedule more work if there is some still to be done. */
+    if ( current_target() != current_pages )
+        mod_timer(&balloon_timer, jiffies + HZ);
 
-    return ret;
+    up(&balloon_mutex);
 }
 
-#define PAGE_TO_MB_SHIFT 8
-
-static int balloon_try_target(int target)
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void set_new_target(unsigned long target)
 {
-    int change, reclaim;
-
-    if ( target < current_pages )
-    {
-        if ( (change = inflate_balloon(current_pages-target)) <= 0 )
-            return change;
-        current_pages -= change;
-        printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
-            change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
-    }
-    else if ( (reclaim = target - current_pages) > 0 )
-    {
-        if ( (change = deflate_balloon(reclaim)) <= 0 )
-            return change;
-        current_pages += change;
-        printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
-               change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
-    }
-
-    return 1;
+    hard_limit   = ~0UL;
+    target_pages = target;
+    balloon_process(NULL);
 }
 
-
 static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
 {
     switch ( msg->subtype )
@@ -308,7 +300,8 @@ static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
             goto parse_error;
         {
             mem_request_t *req = (mem_request_t *)&msg->msg[0];
-            req->status = balloon_try_target(req->target);
+            set_new_target(req->target);
+            req->status = 0;
         }
         break;        
     default:
@@ -323,20 +316,13 @@ static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
     ctrl_if_send_response(msg);
 }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-typedef size_t count_t;
-#else
-typedef u_long count_t;
-#endif
-
-static int do_balloon_write(const char *buffer, count_t count)
+static int balloon_write(struct file *file, const char *buffer,
+                         size_t count, loff_t *offp)
 {
     char memstring[64], *endchar;
     int len, i;
-    unsigned long target;
-    unsigned long long targetbytes;
+    unsigned long long target_bytes;
 
-    /* Only admin can play with the balloon :) */
     if ( !capable(CAP_SYS_ADMIN) )
         return -EPERM;
 
@@ -344,8 +330,10 @@ static int do_balloon_write(const char *buffer, count_t count)
         return -EFBIG;
 
     len = strnlen_user(buffer, count);
-    if ( len == 0 ) return -EBADMSG;
-    if ( len == 1 ) return 1; /* input starts with a NUL char */
+    if ( len == 0 )
+        return -EBADMSG;
+    if ( len == 1 )
+        goto out; /* input starts with a NUL char */
     if ( strncpy_from_user(memstring, buffer, len) < 0 )
         return -EFAULT;
 
@@ -356,24 +344,10 @@ static int do_balloon_write(const char *buffer, count_t count)
     if ( i == 0 )
         return -EBADMSG;
 
-    targetbytes = memparse(memstring,&endchar);
-    target = targetbytes >> PAGE_SHIFT;
-
-    i = balloon_try_target(target);
-
-    if ( i <= 0 ) return i;
-
-    return len;
-}
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-static int balloon_write(struct file *file, const char *buffer,
-                         size_t count, loff_t *offp)
-{
-    int len = do_balloon_write(buffer, count);
-    
-    if ( len <= 0 ) return len;
+    target_bytes = memparse(memstring,&endchar);
+    set_new_target(target_bytes >> PAGE_SHIFT);
 
+ out:
     *offp += len;
     return len;
 }
@@ -381,18 +355,44 @@ static int balloon_write(struct file *file, const char *buffer,
 static int balloon_read(struct file *filp, char *buffer,
                         size_t count, loff_t *offp)
 {
-    static char priv_buf[32];
-    char *priv_bufp = priv_buf;
+    char *priv_buf;
     int len;
-    len = sprintf(priv_buf,"%lu\n",current_pages<<PAGE_SHIFT);
+
+    priv_buf = (char *)__get_free_page(GFP_KERNEL);
+    if ( priv_buf == NULL )
+        return -ENOMEM;
+
+#define K(_p) ((_p)<<(PAGE_SHIFT-10))
+    len = sprintf(
+        priv_buf,
+        "Current allocation: %8lu kB\n"
+        "Target allocation:  %8lu kB / %8lu kB (actual / requested)\n"
+        "Unused heap space:  %8lu kB / %8lu kB (low-mem / high-mem)\n"
+        "Xen hard limit:     ",
+        K(current_pages),
+        K(current_target()), K(target_pages),
+        K(balloon_low), K(balloon_high));
+
+    if ( hard_limit != ~0UL )
+        len += sprintf(
+            priv_buf + len, 
+            "%8lu kB (inc. %8lu kB driver headroom)\n",
+            K(hard_limit), K(driver_pages));
+    else
+        len += sprintf(
+            priv_buf + len,
+            "     ??? kB\n");
 
     len -= *offp;
-    priv_bufp += *offp;
-    if (len>count) len = count;
-    if (len<0) len = 0;
+    if ( len > count)
+        len = count;
+    if ( len < 0 )
+        len = 0;
 
-    if ( copy_to_user(buffer, priv_bufp, len) != 0 )
-        return -EFAULT;
+    if ( len != 0 )
+        (void)copy_to_user(buffer, &priv_buf[*offp], len);
+
+    free_page((unsigned long)priv_buf);
 
     *offp += len;
     return len;
@@ -403,30 +403,6 @@ static struct file_operations balloon_fops = {
     .write = balloon_write
 };
 
-#else
-
-static int balloon_write(struct file *file, const char *buffer,
-                         u_long count, void *data)
-{
-    return do_balloon_write(buffer, count);
-}
-
-static int balloon_read(char *page, char **start, off_t off,
-                       int count, int *eof, void *data)
-{
-  int len;
-  len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
-  
-  if (len <= off+count) *eof = 1;
-  *start = page + off;
-  len -= off;
-  if (len>count) len = count;
-  if (len<0) len = 0;
-  return len;
-}
-
-#endif
-
 static int __init balloon_init(void)
 {
     unsigned long pfn;
@@ -435,20 +411,23 @@ static int __init balloon_init(void)
     IPRINTK("Initialising balloon driver.\n");
 
     current_pages = min(xen_start_info.nr_pages, max_pfn);
-    if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
+    target_pages  = current_pages;
+    balloon_low   = 0;
+    balloon_high  = 0;
+    driver_pages  = 0UL;
+    hard_limit    = ~0UL;
+
+    init_timer(&balloon_timer);
+    balloon_timer.data = 0;
+    balloon_timer.function = balloon_alarm;
+    
+    if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
     {
-        WPRINTK("Unable to create balloon driver proc entry!");
+        WPRINTK("Unable to create /proc/xen/balloon.\n");
         return -1;
     }
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    balloon_pde->owner     = THIS_MODULE;
-    balloon_pde->nlink     = 1;
     balloon_pde->proc_fops = &balloon_fops;
-#else
-    balloon_pde->write_proc = balloon_write;
-    balloon_pde->read_proc  = balloon_read;
-#endif
 
     (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx,
                                     CALLBACK_IN_BLOCKING_CONTEXT);
@@ -464,14 +443,29 @@ static int __init balloon_init(void)
     return 0;
 }
 
-static void __exit balloon_cleanup(void)
+__initcall(balloon_init);
+
+void balloon_update_driver_allowance(long delta)
 {
-    if ( balloon_pde != NULL )
-    {
-        remove_xen_proc_entry("memory_target");
-        balloon_pde = NULL;
-    }
+    unsigned long flags;
+    balloon_lock(flags);
+    driver_pages += delta;
+    balloon_unlock(flags);
+}
+
+void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
+{
+    unsigned long flags;
+
+    balloon_lock(flags);
+    if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
+                               mfn_list, nr_mfns, 0) != nr_mfns )
+        BUG();
+    current_pages -= nr_mfns;
+    balloon_unlock(flags);
+
+    schedule_work(&balloon_worker);
 }
 
-module_init(balloon_init);
-module_exit(balloon_cleanup);
+EXPORT_SYMBOL(balloon_update_driver_allowance);
+EXPORT_SYMBOL(balloon_put_pages);
index 1e114bd556f928ff427676c2d8d0f349c9436160..41d947bacfbc1204e575fccfd0901276365f3a58 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include "common.h"
+#include <asm-xen/balloon.h>
 
 static void netif_page_release(struct page *page);
 static void netif_skb_release(struct sk_buff *skb);
@@ -29,6 +30,8 @@ static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
 static void net_rx_action(unsigned long unused);
 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
 
+static struct timer_list net_timer;
+
 static struct sk_buff_head rx_queue;
 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
@@ -69,27 +72,20 @@ static unsigned long mfn_list[MAX_MFN_ALLOC];
 static unsigned int alloc_index = 0;
 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
 
-static void __refresh_mfn_list(void)
-{
-    int ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
-                                    mfn_list, MAX_MFN_ALLOC, 0);
-    if ( unlikely(ret != MAX_MFN_ALLOC) )
-        BUG();
-    alloc_index = MAX_MFN_ALLOC;
-}
-
-static unsigned long get_new_mfn(void)
+static unsigned long alloc_mfn(void)
 {
-    unsigned long mfn, flags;
+    unsigned long mfn = 0, flags;
     spin_lock_irqsave(&mfn_lock, flags);
-    if ( alloc_index == 0 )
-        __refresh_mfn_list();
-    mfn = mfn_list[--alloc_index];
+    if ( unlikely(alloc_index == 0) )
+        alloc_index = HYPERVISOR_dom_mem_op(
+            MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
+    if ( alloc_index != 0 )
+        mfn = mfn_list[--alloc_index];
     spin_unlock_irqrestore(&mfn_lock, flags);
     return mfn;
 }
 
-static void dealloc_mfn(unsigned long mfn)
+static void free_mfn(unsigned long mfn)
 {
     unsigned long flags;
     spin_lock_irqsave(&mfn_lock, flags);
@@ -210,8 +206,16 @@ static void net_rx_action(unsigned long unused)
         netif   = (netif_t *)skb->dev->priv;
         vdata   = (unsigned long)skb->data;
         mdata   = virt_to_machine(vdata);
-        new_mfn = get_new_mfn();
-        
+
+        /* Memory squeeze? Back off for an arbitrary while. */
+        if ( (new_mfn = alloc_mfn()) == 0 )
+        {
+            if ( net_ratelimit() )
+                printk(KERN_WARNING "Memory squeeze in netback driver.\n");
+            mod_timer(&net_timer, jiffies + HZ);
+            break;
+        }
+
         /*
          * Set the new P2M table entry before reassigning the old data page.
          * Heed the comment in pgtable-2level.h:pte_page(). :-)
@@ -280,7 +284,7 @@ static void net_rx_action(unsigned long unused)
         if ( unlikely(mcl[1].args[5] != 0) )
         {
             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
-            dealloc_mfn(mdata >> PAGE_SHIFT);
+            free_mfn(mdata >> PAGE_SHIFT);
             status = NETIF_RSP_ERROR;
         }
 
@@ -307,7 +311,7 @@ static void net_rx_action(unsigned long unused)
     }
 
     /* More work to do? */
-    if ( !skb_queue_empty(&rx_queue) )
+    if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
         tasklet_schedule(&net_rx_tasklet);
 #if 0
     else
@@ -315,6 +319,11 @@ static void net_rx_action(unsigned long unused)
 #endif
 }
 
+static void net_alarm(unsigned long unused)
+{
+    tasklet_schedule(&net_rx_tasklet);
+}
+
 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
 {
     netif_t *netif = dev->priv;
@@ -781,9 +790,16 @@ static int __init netback_init(void)
 
     printk("Initialising Xen netif backend\n");
 
+    /* We can increase reservation by this much in net_rx_action(). */
+    balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
+
     skb_queue_head_init(&rx_queue);
     skb_queue_head_init(&tx_queue);
 
+    init_timer(&net_timer);
+    net_timer.data = 0;
+    net_timer.function = net_alarm;
+    
     netif_interface_init();
 
     if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
index dd1b2492fff9e608ef626d98a9e5c2401297badd..d2b96cbe0f1cae2b5d483ff736772eebcdf17b3e 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm-xen/evtchn.h>
 #include <asm-xen/ctrl_if.h>
 #include <asm-xen/xen-public/io/netif.h>
+#include <asm-xen/balloon.h>
 #include <asm/page.h>
 
 #include <net/arp.h>
@@ -409,6 +410,9 @@ static void network_alloc_rx_buffers(struct net_device *dev)
     rx_mcl[i].args[3] = 0;
     rx_mcl[i].args[4] = DOMID_SELF;
 
+    /* Tell the ballon driver what is going on. */
+    balloon_update_driver_allowance(i);
+
     /* Zap PTEs and give away pages in one big multicall. */
     (void)HYPERVISOR_multicall(rx_mcl, i+1);
 
@@ -557,14 +561,15 @@ static int netif_poll(struct net_device *dev, int *pbudget)
         /*
          * An error here is very odd. Usually indicates a backend bug,
          * low-memory condition, or that we didn't have reservation headroom.
-         * Whatever - print an error and queue the id again straight away.
          */
         if ( unlikely(rx->status <= 0) )
         {
-           printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
+            if ( net_ratelimit() )
+                printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
             np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
             wmb();
             np->rx->req_prod++;
+            work_done--;
             continue;
         }
 
@@ -595,6 +600,9 @@ static int netif_poll(struct net_device *dev, int *pbudget)
         __skb_queue_tail(&rxq, skb);
     }
 
+    /* Some pages are no longer absent... */
+    balloon_update_driver_allowance(-work_done);
+
     /* Do all the remapping work, and M->P updates, in one big hypercall. */
     if ( likely((mcl - rx_mcl) != 0) )
     {
index 4316d8e2f97ee1cd5228f15f2b383ed5c241a8c9..98e7e92ff47cc2def937caa7671c9b3eba8780bc 100644 (file)
@@ -7,7 +7,6 @@
  */
 
 #include <linux/config.h>
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -213,23 +212,9 @@ static int __init privcmd_init(void)
 
     privcmd_intf = create_xen_proc_entry("privcmd", 0400);
     if ( privcmd_intf != NULL )
-    {
-        privcmd_intf->owner      = THIS_MODULE;
-        privcmd_intf->nlink      = 1;
-        privcmd_intf->proc_fops  = &privcmd_file_ops;
-    }
+        privcmd_intf->proc_fops = &privcmd_file_ops;
 
     return 0;
 }
 
-
-static void __exit privcmd_cleanup(void)
-{
-    if ( privcmd_intf == NULL ) return;
-    remove_xen_proc_entry("privcmd");
-    privcmd_intf = NULL;
-}
-
-
-module_init(privcmd_init);
-module_exit(privcmd_cleanup);
+__initcall(privcmd_init);
diff --git a/linux-2.6.9-xen-sparse/include/asm-xen/balloon.h b/linux-2.6.9-xen-sparse/include/asm-xen/balloon.h
new file mode 100644 (file)
index 0000000..80ef4ac
--- /dev/null
@@ -0,0 +1,51 @@
+/******************************************************************************
+ * balloon.h
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __ASM_BALLOON_H__
+#define __ASM_BALLOON_H__
+
+/*
+ * Inform the balloon driver that it should allow some slop for device-driver
+ * memory activities.
+ */
+extern void balloon_update_driver_allowance(long delta);
+
+/* Give up unmapped pages to the balloon driver. */
+extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns);
+
+/*
+ * Prevent the balloon driver from changing the memory reservation during
+ * a driver critical region.
+ */
+extern spinlock_t balloon_lock;
+#define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
+#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
+
+#endif /* __ASM_BALLOON_H__ */
index c82469bc0071ed115c07f1d62b77baa77d6d03ee..ef755a4e7d16e31ec89d8dbf9c0d653429544895 100644 (file)
@@ -102,8 +102,6 @@ void MULTICALL_flush_page_update_queue(void);
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
 /* Allocate a contiguous empty region of low memory. Return virtual start. */
 unsigned long allocate_empty_lowmem_region(unsigned long pages);
-/* Deallocate a contiguous region of low memory. Return it to the allocator. */
-void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
 #endif
 
 /*