ia64/xen-unstable

view tools/blktap2/drivers/log.h @ 19647:1c627434605e

blktap2: a completely rewritten blktap implementation

Benefits to blktap2 over the old version of blktap:

* Isolation from xenstore - Blktap devices are now created directly on
the linux dom0 command line, rather than being spawned in response
to XenStore events. This is handy for debugging, makes blktap
generally easier to work with, and is a step toward a generic
user-level block device implementation that is not Xen-specific.

* Improved tapdisk infrastructure: simpler request forwarding, new
request scheduler, request merging, more efficient use of AIO.

* Improved tapdisk error handling and memory management. No
allocations on the block data path, IO retry logic to protect
guests
transient block device failures. This has been tested and is known
to work on weird environments such as NFS soft mounts.

* Pause and snapshot of live virtual disks (see xmsnap script).

* VHD support. The VHD code in this release has been rigorously
tested, and represents a very mature implementation of the VHD
image
format.

* No more duplication of mechanism with blkback. The blktap kernel
module has changed dramatically from the original blktap. Blkback
is now always used to talk to Xen guests, blktap just presents a
Linux gendisk that blkback can export. This is done while
preserving the zero-copy data path from domU to physical device.

These patches deprecate the old blktap code, which can hopefully be
removed from the tree completely at some point in the future.

Signed-off-by: Jake Wires <jake.wires@citrix.com>
Signed-off-by: Dutch Meyer <dmeyer@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:52:31 2009 +0100 (2009-05-26)
parents
children
line source
1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
29 /* log.h: API for writelog communication */
31 #ifndef __LOG_H__
32 #define __LOG_H__ 1
34 #include <inttypes.h>
36 #include <xen/io/ring.h>
37 /* for wmb et al */
38 #include <xenctrl.h>
40 #define LOGCMD_SHMP "shmp"
41 #define LOGCMD_PEEK "peek"
42 #define LOGCMD_CLEAR "clrw"
43 #define LOGCMD_GET "getw"
44 #define LOGCMD_KICK "kick"
46 #define CTLRSPLEN_SHMP 256
47 #define CTLRSPLEN_PEEK 4
48 #define CTLRSPLEN_CLEAR 4
49 #define CTLRSPLEN_GET 4
50 #define CTLRSPLEN_KICK 0
52 /* shmregion is arbitrarily capped at 8 megs for a minimum of
53 * 64 MB of data per read (if there are no contiguous regions)
54 * In the off-chance that there is more dirty data, multiple
55 * reads must be done */
56 #define SHMSIZE (8 * 1024 * 1024)
57 #define SRINGSIZE 4096
59 /* The shared memory region is split up into 3 subregions:
60 * The first half is reserved for the dirty bitmap log.
61 * The second half begins with 1 page for read request descriptors,
62 * followed by a big area for supplying read data.
63 */
64 static inline void* bmstart(void* shm)
65 {
66 return shm;
67 }
69 static inline void* bmend(void* shm)
70 {
71 return shm + SHMSIZE/2;
72 }
74 static inline void* sringstart(void* shm)
75 {
76 return bmend(shm);
77 }
79 static inline void* sdatastart(void* shm)
80 {
81 return sringstart(shm) + SRINGSIZE;
82 }
84 static inline void* sdataend(void* shm)
85 {
86 return shm + SHMSIZE;
87 }
89 /* format for messages between log client and server */
90 struct log_ctlmsg {
91 char msg[4];
92 char params[16];
93 };
95 /* extent descriptor */
96 struct disk_range {
97 uint64_t sector;
98 uint32_t count;
99 };
101 /* dirty write logging space. This is an extent ring at the front,
102 * full of disk_ranges plus a pointer into the data area */
103 /* I think I'd rather have the header in front of each data section to
104 * avoid having two separate spaces that can run out, but then I'd either
105 * lose page alignment on the data blocks or spend an entire page on the
106 * header */
108 struct log_extent {
109 uint64_t sector;
110 uint32_t count;
111 uint32_t offset; /* offset from start of data area to start of extent */
112 };
114 /* struct above should be 16 bytes, or 256 extents/page */
116 typedef struct log_extent log_request_t;
117 typedef struct log_extent log_response_t;
119 DEFINE_RING_TYPES(log, log_request_t, log_response_t);
121 #define LOG_HEADER_PAGES 4
123 #endif