ia64/xen-unstable

view tools/blktap/drivers/block-aio.c @ 15783:c93e2a822d6f

[xen, xencomm] xencomm multiple page support
Current implementation doesn't allow struct xencomm_desc::address
array to be more than single page. On IA64 it causes 64GB+ domain
creation failure. This patch generalizes xencomm to allow multipage

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author kfraser@localhost.localdomain
date Tue Aug 28 15:32:27 2007 +0100 (2007-08-28)
parents eeeb77195ac2
children b6cc74f275fd
line source
1 /* block-aio.c
2 *
3 * libaio-based raw disk implementation.
4 *
5 * (c) 2006 Andrew Warfield and Julian Chesterfield
6 *
7 * NB: This code is not thread-safe.
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License version 2
11 * as published by the Free Software Foundation; or, when distributed
12 * separately from the Linux kernel or incorporated into other
13 * software packages, subject to the following license:
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
16 * of this source file (the "Software"), to deal in the Software without
17 * restriction, including without limitation the rights to use, copy, modify,
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
19 * and to permit persons to whom the Software is furnished to do so, subject to
20 * the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included in
23 * all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 * IN THE SOFTWARE.
32 */
35 #include <errno.h>
36 #include <libaio.h>
37 #include <fcntl.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <sys/statvfs.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <linux/fs.h>
45 #include "tapdisk.h"
46 #include "tapaio.h"
48 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
50 struct pending_aio {
51 td_callback_t cb;
52 int id;
53 void *private;
54 uint64_t lsec;
55 };
57 struct tdaio_state {
58 int fd;
60 /* libaio state */
61 tap_aio_context_t aio_ctx;
62 struct iocb iocb_list [MAX_AIO_REQS];
63 struct iocb *iocb_free [MAX_AIO_REQS];
64 struct pending_aio pending_aio[MAX_AIO_REQS];
65 int iocb_free_count;
66 struct iocb *iocb_queue[MAX_AIO_REQS];
67 int iocb_queued;
68 struct io_event aio_events[MAX_AIO_REQS];
69 };
71 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
73 /*Get Image size, secsize*/
74 static int get_image_info(struct td_state *s, int fd)
75 {
76 int ret;
77 long size;
78 unsigned long total_size;
79 struct statvfs statBuf;
80 struct stat stat;
82 ret = fstat(fd, &stat);
83 if (ret != 0) {
84 DPRINTF("ERROR: fstat failed, Couldn't stat image");
85 return -EINVAL;
86 }
88 if (S_ISBLK(stat.st_mode)) {
89 /*Accessing block device directly*/
90 s->size = 0;
91 if (ioctl(fd,BLKGETSIZE,&s->size)!=0) {
92 DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image");
93 return -EINVAL;
94 }
96 DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
97 "sector_shift [%llu]\n",
98 (long long unsigned)(s->size << SECTOR_SHIFT),
99 (long long unsigned)s->size);
101 /*Get the sector size*/
102 #if defined(BLKSSZGET)
103 {
104 int arg;
105 s->sector_size = DEFAULT_SECTOR_SIZE;
106 ioctl(fd, BLKSSZGET, &s->sector_size);
108 if (s->sector_size != DEFAULT_SECTOR_SIZE)
109 DPRINTF("Note: sector size is %ld (not %d)\n",
110 s->sector_size, DEFAULT_SECTOR_SIZE);
111 }
112 #else
113 s->sector_size = DEFAULT_SECTOR_SIZE;
114 #endif
116 } else {
117 /*Local file? try fstat instead*/
118 s->size = (stat.st_size >> SECTOR_SHIFT);
119 s->sector_size = DEFAULT_SECTOR_SIZE;
120 DPRINTF("Image size: \n\tpre sector_shift [%llu]\n\tpost "
121 "sector_shift [%llu]\n",
122 (long long unsigned)(s->size << SECTOR_SHIFT),
123 (long long unsigned)s->size);
124 }
126 if (s->size == 0) {
127 s->size =((uint64_t) 16836057);
128 s->sector_size = DEFAULT_SECTOR_SIZE;
129 }
130 s->info = 0;
132 return 0;
133 }
135 static inline void init_fds(struct disk_driver *dd)
136 {
137 int i;
138 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
140 for(i = 0; i < MAX_IOFD; i++)
141 dd->io_fd[i] = 0;
143 dd->io_fd[0] = prv->aio_ctx.pollfd;
144 }
146 /* Open the disk file and initialize aio state. */
147 int tdaio_open (struct disk_driver *dd, const char *name, td_flag_t flags)
148 {
149 int i, fd, ret = 0, o_flags;
150 struct td_state *s = dd->td_state;
151 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
153 DPRINTF("block-aio open('%s')", name);
154 /* Initialize AIO */
155 prv->iocb_free_count = MAX_AIO_REQS;
156 prv->iocb_queued = 0;
158 ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
159 if (ret < 0) {
160 if (ret == -EAGAIN) {
161 DPRINTF("Couldn't setup AIO context. If you are "
162 "trying to concurrently use a large number "
163 "of blktap-based disks, you may need to "
164 "increase the system-wide aio request limit. "
165 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
166 "aio-max-nr')\n");
167 } else {
168 DPRINTF("Couldn't setup AIO context.\n");
169 }
170 goto done;
171 }
173 for (i=0;i<MAX_AIO_REQS;i++)
174 prv->iocb_free[i] = &prv->iocb_list[i];
176 /* Open the file */
177 o_flags = O_DIRECT | O_LARGEFILE |
178 ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
179 fd = open(name, o_flags);
181 if ( (fd == -1) && (errno == EINVAL) ) {
183 /* Maybe O_DIRECT isn't supported. */
184 o_flags &= ~O_DIRECT;
185 fd = open(name, o_flags);
186 if (fd != -1) DPRINTF("WARNING: Accessing image without"
187 "O_DIRECT! (%s)\n", name);
189 } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
191 if (fd == -1) {
192 DPRINTF("Unable to open [%s] (%d)!\n", name, 0 - errno);
193 ret = 0 - errno;
194 goto done;
195 }
197 prv->fd = fd;
199 init_fds(dd);
200 ret = get_image_info(s, fd);
202 done:
203 return ret;
204 }
206 int tdaio_queue_read(struct disk_driver *dd, uint64_t sector,
207 int nb_sectors, char *buf, td_callback_t cb,
208 int id, void *private)
209 {
210 struct iocb *io;
211 struct pending_aio *pio;
212 struct td_state *s = dd->td_state;
213 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
214 int size = nb_sectors * s->sector_size;
215 uint64_t offset = sector * (uint64_t)s->sector_size;
216 long ioidx;
218 if (prv->iocb_free_count == 0)
219 return -ENOMEM;
220 io = prv->iocb_free[--prv->iocb_free_count];
222 ioidx = IOCB_IDX(prv, io);
223 pio = &prv->pending_aio[ioidx];
224 pio->cb = cb;
225 pio->id = id;
226 pio->private = private;
227 pio->lsec = sector;
229 io_prep_pread(io, prv->fd, buf, size, offset);
230 io->data = (void *)ioidx;
232 prv->iocb_queue[prv->iocb_queued++] = io;
234 return 0;
235 }
237 int tdaio_queue_write(struct disk_driver *dd, uint64_t sector,
238 int nb_sectors, char *buf, td_callback_t cb,
239 int id, void *private)
240 {
241 struct iocb *io;
242 struct pending_aio *pio;
243 struct td_state *s = dd->td_state;
244 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
245 int size = nb_sectors * s->sector_size;
246 uint64_t offset = sector * (uint64_t)s->sector_size;
247 long ioidx;
249 if (prv->iocb_free_count == 0)
250 return -ENOMEM;
251 io = prv->iocb_free[--prv->iocb_free_count];
253 ioidx = IOCB_IDX(prv, io);
254 pio = &prv->pending_aio[ioidx];
255 pio->cb = cb;
256 pio->id = id;
257 pio->private = private;
258 pio->lsec = sector;
260 io_prep_pwrite(io, prv->fd, buf, size, offset);
261 io->data = (void *)ioidx;
263 prv->iocb_queue[prv->iocb_queued++] = io;
265 return 0;
266 }
268 int tdaio_submit(struct disk_driver *dd)
269 {
270 int ret;
271 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
273 if (!prv->iocb_queued)
274 return 0;
276 ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);
278 /* XXX: TODO: Handle error conditions here. */
280 /* Success case: */
281 prv->iocb_queued = 0;
283 return 0;
284 }
286 int tdaio_close(struct disk_driver *dd)
287 {
288 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
290 io_destroy(prv->aio_ctx.aio_ctx);
291 close(prv->fd);
293 return 0;
294 }
296 int tdaio_do_callbacks(struct disk_driver *dd, int sid)
297 {
298 int i, nr_events, rsp = 0;
299 struct io_event *ep;
300 struct tdaio_state *prv = (struct tdaio_state *)dd->private;
302 nr_events = tap_aio_get_events(&prv->aio_ctx);
303 repeat:
304 for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
305 struct iocb *io = ep->obj;
306 struct pending_aio *pio;
308 pio = &prv->pending_aio[(long)io->data];
309 rsp += pio->cb(dd, ep->res == io->u.c.nbytes ? 0 : 1,
310 pio->lsec, io->u.c.nbytes >> 9,
311 pio->id, pio->private);
313 prv->iocb_free[prv->iocb_free_count++] = io;
314 }
316 if (nr_events) {
317 nr_events = tap_aio_more_events(&prv->aio_ctx);
318 goto repeat;
319 }
321 tap_aio_continue(&prv->aio_ctx);
323 return rsp;
324 }
326 int tdaio_get_parent_id(struct disk_driver *dd, struct disk_id *id)
327 {
328 return TD_NO_PARENT;
329 }
331 int tdaio_validate_parent(struct disk_driver *dd,
332 struct disk_driver *parent, td_flag_t flags)
333 {
334 return -EINVAL;
335 }
337 struct tap_disk tapdisk_aio = {
338 .disk_type = "tapdisk_aio",
339 .private_data_size = sizeof(struct tdaio_state),
340 .td_open = tdaio_open,
341 .td_queue_read = tdaio_queue_read,
342 .td_queue_write = tdaio_queue_write,
343 .td_submit = tdaio_submit,
344 .td_close = tdaio_close,
345 .td_do_callbacks = tdaio_do_callbacks,
346 .td_get_parent_id = tdaio_get_parent_id,
347 .td_validate_parent = tdaio_validate_parent
348 };