ia64/linux-2.6.18-xen.hg

view mm/page_io.c @ 907:cad6f60f0506

Transcendent memory ("tmem") for Linux

Tmem, when called from a tmem-capable (paravirtualized) guest, makes
use of otherwise unutilized ("fallow") memory to create and manage
pools of pages that can be accessed from the guest either as
"ephemeral" pages or as "persistent" pages. In either case, the pages
are not directly addressible by the guest, only copied to and fro via
the tmem interface. Ephemeral pages are a nice place for a guest to
put recently evicted clean pages that it might need again; these pages
can be reclaimed synchronously by Xen for other guests or other uses.
Persistent pages are a nice place for a guest to put "swap" pages to
avoid sending them to disk. These pages retain data as long as the
guest lives, but count against the guest memory allocation.

This patch contains the Linux paravirtualization changes to
complement the tmem Xen patch (xen-unstable c/s 19646). It
implements "precache" (ext3 only as of now), "preswap",
and limited "shared precache" (ocfs2 only as of now) support.
CONFIG options are required to turn on
the support (but in this patch they default to "y"). If
the underlying Xen does not have tmem support or has it
turned off, this is sensed early to avoid nearly all
hypercalls.

Lots of useful prose about tmem can be found at
http://oss.oracle.com/projects/tmem

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:24:18 2009 +0100 (2009-06-18)
parents 831230e53067
children
line source
1 /*
2 * linux/mm/page_io.c
3 *
4 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
5 *
6 * Swap reorganised 29.12.95,
7 * Asynchronous swapping added 30.12.95. Stephen Tweedie
8 * Removed race in async swapping. 14.4.1996. Bruno Haible
9 * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
10 * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
11 */
13 #include <linux/mm.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/pagemap.h>
16 #include <linux/swap.h>
17 #include <linux/bio.h>
18 #include <linux/swapops.h>
19 #include <linux/writeback.h>
20 #include <asm/pgtable.h>
22 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
23 struct page *page, bio_end_io_t end_io)
24 {
25 struct bio *bio;
27 bio = bio_alloc(gfp_flags, 1);
28 if (bio) {
29 struct swap_info_struct *sis;
30 swp_entry_t entry = { .val = index, };
32 sis = get_swap_info_struct(swp_type(entry));
33 bio->bi_sector = map_swap_page(sis, swp_offset(entry)) *
34 (PAGE_SIZE >> 9);
35 bio->bi_bdev = sis->bdev;
36 bio->bi_io_vec[0].bv_page = page;
37 bio->bi_io_vec[0].bv_len = PAGE_SIZE;
38 bio->bi_io_vec[0].bv_offset = 0;
39 bio->bi_vcnt = 1;
40 bio->bi_idx = 0;
41 bio->bi_size = PAGE_SIZE;
42 bio->bi_end_io = end_io;
43 }
44 return bio;
45 }
47 static int end_swap_bio_write(struct bio *bio, unsigned int bytes_done, int err)
48 {
49 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
50 struct page *page = bio->bi_io_vec[0].bv_page;
52 if (bio->bi_size)
53 return 1;
55 if (!uptodate)
56 SetPageError(page);
57 end_page_writeback(page);
58 bio_put(bio);
59 return 0;
60 }
62 static int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err)
63 {
64 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
65 struct page *page = bio->bi_io_vec[0].bv_page;
67 if (bio->bi_size)
68 return 1;
70 if (!uptodate) {
71 SetPageError(page);
72 ClearPageUptodate(page);
73 } else {
74 SetPageUptodate(page);
75 }
76 unlock_page(page);
77 bio_put(bio);
78 return 0;
79 }
81 /*
82 * We may have stale swap cache pages in memory: notice
83 * them here and get rid of the unnecessary final write.
84 */
85 int swap_writepage(struct page *page, struct writeback_control *wbc)
86 {
87 struct bio *bio;
88 int ret = 0, rw = WRITE;
90 if (remove_exclusive_swap_page(page)) {
91 unlock_page(page);
92 goto out;
93 }
94 bio = get_swap_bio(GFP_NOIO, page_private(page), page,
95 end_swap_bio_write);
96 if (bio == NULL) {
97 set_page_dirty(page);
98 unlock_page(page);
99 ret = -ENOMEM;
100 goto out;
101 }
103 set_page_writeback(page);
104 if (preswap_put(page) == 1) {
105 unlock_page(page);
106 end_page_writeback(page);
107 bio_put(bio);
108 goto out;
109 }
111 if (wbc->sync_mode == WB_SYNC_ALL)
112 rw |= (1 << BIO_RW_SYNC);
113 count_vm_event(PSWPOUT);
114 unlock_page(page);
115 submit_bio(rw, bio);
116 out:
117 return ret;
118 }
120 int swap_readpage(struct file *file, struct page *page)
121 {
122 struct bio *bio;
123 int ret = 0;
125 BUG_ON(!PageLocked(page));
126 ClearPageUptodate(page);
128 if (preswap_get(page) == 1) {
129 SetPageUptodate(page);
130 unlock_page(page);
131 goto out;
132 }
134 bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
135 end_swap_bio_read);
136 if (bio == NULL) {
137 unlock_page(page);
138 ret = -ENOMEM;
139 goto out;
140 }
141 count_vm_event(PSWPIN);
142 submit_bio(READ, bio);
143 out:
144 return ret;
145 }
147 #ifdef CONFIG_SOFTWARE_SUSPEND
148 /*
149 * A scruffy utility function to read or write an arbitrary swap page
150 * and wait on the I/O. The caller must have a ref on the page.
151 *
152 * We use end_swap_bio_read() even for writes, because it happens to do what
153 * we want.
154 */
155 int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
156 {
157 struct bio *bio;
158 int ret = 0;
160 lock_page(page);
162 bio = get_swap_bio(GFP_KERNEL, entry.val, page, end_swap_bio_read);
163 if (bio == NULL) {
164 unlock_page(page);
165 ret = -ENOMEM;
166 goto out;
167 }
169 submit_bio(rw | (1 << BIO_RW_SYNC), bio);
170 wait_on_page_locked(page);
172 if (!PageUptodate(page) || PageError(page))
173 ret = -EIO;
174 out:
175 return ret;
176 }
177 #endif