ia64/xen-unstable

annotate tools/xenstore/xenstored_domain.c @ 6690:b7c7cb88f0ba

Create /dev/xen/evtchn if it doesn't exist.

Signed-off-by: Steven Hand <steven@xensource.com>
author shand@ubuntu.eng.hq.xensource.com
date Wed Sep 07 12:30:00 2005 -0800 (2005-09-07)
parents acde14d25398
children 7bc32f4c67fb
rev   line source
cl349@5357 1 /*
cl349@5357 2 Domain communications for Xen Store Daemon.
cl349@5357 3 Copyright (C) 2005 Rusty Russell IBM Corporation
cl349@5357 4
cl349@5357 5 This program is free software; you can redistribute it and/or modify
cl349@5357 6 it under the terms of the GNU General Public License as published by
cl349@5357 7 the Free Software Foundation; either version 2 of the License, or
cl349@5357 8 (at your option) any later version.
cl349@5357 9
cl349@5357 10 This program is distributed in the hope that it will be useful,
cl349@5357 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
cl349@5357 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cl349@5357 13 GNU General Public License for more details.
cl349@5357 14
cl349@5357 15 You should have received a copy of the GNU General Public License
cl349@5357 16 along with this program; if not, write to the Free Software
cl349@5357 17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
cl349@5357 18 */
cl349@5357 19
cl349@5357 20 #include <stdio.h>
cl349@5357 21 #include <linux/ioctl.h>
cl349@5357 22 #include <sys/ioctl.h>
cl349@5357 23 #include <sys/mman.h>
cl349@5357 24 #include <unistd.h>
cl349@5357 25 #include <stdlib.h>
cl349@5357 26 #include <stdarg.h>
cl349@5357 27 #include <sys/types.h>
cl349@5357 28 #include <sys/stat.h>
cl349@5357 29 #include <fcntl.h>
cl349@5357 30
cl349@5357 31 //#define DEBUG
cl349@5357 32 #include "utils.h"
cl349@5357 33 #include "talloc.h"
cl349@5357 34 #include "xenstored_core.h"
cl349@5357 35 #include "xenstored_domain.h"
cl349@6648 36 #include "xenstored_watch.h"
cl349@5357 37 #include "xenstored_test.h"
cl349@5357 38
cl349@5357 39 static int *xc_handle;
cl349@5357 40 static int eventchn_fd;
cl349@6679 41 static int virq_port;
cl349@5357 42 static unsigned int ringbuf_datasize;
cl349@5357 43
cl349@5357 44 struct domain
cl349@5357 45 {
cl349@5357 46 struct list_head list;
cl349@5357 47
cl349@5357 48 /* The id of this domain */
cl349@5357 49 domid_t domid;
cl349@5357 50
cl349@5357 51 /* Event channel port */
cl349@5357 52 u16 port;
cl349@5357 53
cl349@5357 54 /* Domain path in store. */
cl349@5357 55 char *path;
cl349@5357 56
cl349@5357 57 /* Shared page. */
cl349@5357 58 void *page;
cl349@5357 59
cl349@5357 60 /* Input and output ringbuffer heads. */
cl349@5357 61 struct ringbuf_head *input, *output;
cl349@5357 62
cl349@5357 63 /* The connection associated with this. */
cl349@5357 64 struct connection *conn;
cl349@5357 65
cl349@5357 66 };
cl349@5357 67
cl349@5357 68 static LIST_HEAD(domains);
cl349@5357 69
cl349@5357 70 struct ringbuf_head
cl349@5357 71 {
cl349@5357 72 u32 write; /* Next place to write to */
cl349@5357 73 u32 read; /* Next place to read from */
cl349@5357 74 u8 flags;
cl349@5357 75 char buf[0];
cl349@5357 76 } __attribute__((packed));
cl349@5357 77
cl349@5357 78 #define EVENTCHN_BIND _IO('E', 2)
cl349@5357 79 #define EVENTCHN_UNBIND _IO('E', 3)
cl349@5357 80
cl349@5357 81 /* FIXME: Mark connection as broken (close it?) when this happens. */
cl349@5357 82 static bool check_buffer(const struct ringbuf_head *h)
cl349@5357 83 {
cl349@5357 84 return (h->write < ringbuf_datasize && h->read < ringbuf_datasize);
cl349@5357 85 }
cl349@5357 86
cl349@5357 87 /* We can't fill last byte: would look like empty buffer. */
cl349@5357 88 static void *get_output_chunk(const struct ringbuf_head *h,
cl349@5357 89 void *buf, u32 *len)
cl349@5357 90 {
cl349@5357 91 u32 read_mark;
cl349@5357 92
cl349@5357 93 if (h->read == 0)
cl349@5357 94 read_mark = ringbuf_datasize - 1;
cl349@5357 95 else
cl349@5357 96 read_mark = h->read - 1;
cl349@5357 97
cl349@5357 98 /* Here to the end of buffer, unless they haven't read some out. */
cl349@5357 99 *len = ringbuf_datasize - h->write;
cl349@5357 100 if (read_mark >= h->write)
cl349@5357 101 *len = read_mark - h->write;
cl349@5357 102 return buf + h->write;
cl349@5357 103 }
cl349@5357 104
cl349@5357 105 static const void *get_input_chunk(const struct ringbuf_head *h,
cl349@5357 106 const void *buf, u32 *len)
cl349@5357 107 {
cl349@5357 108 /* Here to the end of buffer, unless they haven't written some. */
cl349@5357 109 *len = ringbuf_datasize - h->read;
cl349@5357 110 if (h->write >= h->read)
cl349@5357 111 *len = h->write - h->read;
cl349@5357 112 return buf + h->read;
cl349@5357 113 }
cl349@5357 114
cl349@5357 115 static void update_output_chunk(struct ringbuf_head *h, u32 len)
cl349@5357 116 {
cl349@5357 117 h->write += len;
cl349@5357 118 if (h->write == ringbuf_datasize)
cl349@5357 119 h->write = 0;
cl349@5357 120 }
cl349@5357 121
cl349@5357 122 static void update_input_chunk(struct ringbuf_head *h, u32 len)
cl349@5357 123 {
cl349@5357 124 h->read += len;
cl349@5357 125 if (h->read == ringbuf_datasize)
cl349@5357 126 h->read = 0;
cl349@5357 127 }
cl349@5357 128
cl349@5357 129 static bool buffer_has_input(const struct ringbuf_head *h)
cl349@5357 130 {
cl349@5357 131 u32 len;
cl349@5357 132
cl349@5357 133 get_input_chunk(h, NULL, &len);
cl349@5357 134 return (len != 0);
cl349@5357 135 }
cl349@5357 136
cl349@5357 137 static bool buffer_has_output_room(const struct ringbuf_head *h)
cl349@5357 138 {
cl349@5357 139 u32 len;
cl349@5357 140
cl349@5357 141 get_output_chunk(h, NULL, &len);
cl349@5357 142 return (len != 0);
cl349@5357 143 }
cl349@5357 144
cl349@5357 145 static int writechn(struct connection *conn, const void *data, unsigned int len)
cl349@5357 146 {
cl349@5357 147 u32 avail;
cl349@5357 148 void *dest;
cl349@5357 149 struct ringbuf_head h;
cl349@5357 150
cl349@5357 151 /* Must read head once, and before anything else, and verified. */
cl349@5357 152 h = *conn->domain->output;
cl349@5357 153 mb();
cl349@5357 154 if (!check_buffer(&h)) {
cl349@5357 155 errno = EIO;
cl349@5357 156 return -1;
cl349@5357 157 }
cl349@5357 158
cl349@5357 159 dest = get_output_chunk(&h, conn->domain->output->buf, &avail);
cl349@5357 160 if (avail < len)
cl349@5357 161 len = avail;
cl349@5357 162
cl349@5357 163 memcpy(dest, data, len);
cl349@5357 164 mb();
cl349@5357 165 update_output_chunk(conn->domain->output, len);
cl349@5357 166 /* FIXME: Probably not neccessary. */
cl349@5357 167 mb();
cl349@5357 168 xc_evtchn_send(*xc_handle, conn->domain->port);
cl349@5357 169 return len;
cl349@5357 170 }
cl349@5357 171
cl349@5357 172 static int readchn(struct connection *conn, void *data, unsigned int len)
cl349@5357 173 {
cl349@5357 174 u32 avail;
cl349@5357 175 const void *src;
cl349@5357 176 struct ringbuf_head h;
cl349@5357 177 bool was_full;
cl349@5357 178
cl349@5357 179 /* Must read head once, and before anything else, and verified. */
cl349@5357 180 h = *conn->domain->input;
cl349@5357 181 mb();
cl349@5357 182
cl349@5357 183 if (!check_buffer(&h)) {
cl349@5357 184 errno = EIO;
cl349@5357 185 return -1;
cl349@5357 186 }
cl349@5357 187
cl349@5357 188 src = get_input_chunk(&h, conn->domain->input->buf, &avail);
cl349@5357 189 if (avail < len)
cl349@5357 190 len = avail;
cl349@5357 191
cl349@5357 192 was_full = !buffer_has_output_room(&h);
cl349@5357 193 memcpy(data, src, len);
cl349@5357 194 mb();
cl349@5357 195 update_input_chunk(conn->domain->input, len);
cl349@5357 196 /* FIXME: Probably not neccessary. */
cl349@5357 197 mb();
cl349@5357 198
cl349@5357 199 /* If it was full, tell them we've taken some. */
cl349@5357 200 if (was_full)
cl349@5357 201 xc_evtchn_send(*xc_handle, conn->domain->port);
cl349@5357 202 return len;
cl349@5357 203 }
cl349@5357 204
cl349@5357 205 static int destroy_domain(void *_domain)
cl349@5357 206 {
cl349@5357 207 struct domain *domain = _domain;
cl349@5357 208
cl349@5357 209 list_del(&domain->list);
cl349@5357 210
cl349@5357 211 if (domain->port &&
cl349@5357 212 (ioctl(eventchn_fd, EVENTCHN_UNBIND, domain->port) != 0))
cl349@5357 213 eprintf("> Unbinding port %i failed!\n", domain->port);
cl349@5357 214
cl349@5357 215 if(domain->page)
cl349@5357 216 munmap(domain->page, getpagesize());
cl349@5357 217
cl349@5357 218 return 0;
cl349@5357 219 }
cl349@5357 220
kaf24@6058 221 /* We scan all domains rather than use the information given here. */
cl349@5357 222 void handle_event(int event_fd)
cl349@5357 223 {
cl349@5357 224 u16 port;
cl349@5357 225
cl349@5357 226 if (read(event_fd, &port, sizeof(port)) != sizeof(port))
cl349@5357 227 barf_perror("Failed to read from event fd");
cl349@6679 228
cl349@6679 229 if (port == virq_port)
cl349@6679 230 domain_cleanup();
cl349@6679 231
cl349@5357 232 #ifndef TESTING
cl349@5357 233 if (write(event_fd, &port, sizeof(port)) != sizeof(port))
cl349@5357 234 barf_perror("Failed to write to event fd");
cl349@5357 235 #endif
cl349@5357 236 }
cl349@5357 237
kaf24@6058 238 bool domain_can_read(struct connection *conn)
kaf24@6058 239 {
kaf24@6058 240 return conn->state == OK && buffer_has_input(conn->domain->input);
kaf24@6058 241 }
kaf24@6058 242
kaf24@6058 243 bool domain_can_write(struct connection *conn)
kaf24@6058 244 {
kaf24@6058 245 return conn->out && buffer_has_output_room(conn->domain->output);
kaf24@6058 246 }
kaf24@6058 247
cl349@5868 248 static struct domain *new_domain(void *context, domid_t domid,
cl349@5868 249 unsigned long mfn, int port,
cl349@5868 250 const char *path)
cl349@5357 251 {
cl349@5357 252 struct domain *domain;
cl349@5868 253 domain = talloc(context, struct domain);
cl349@5920 254 domain->port = 0;
cl349@5868 255 domain->domid = domid;
cl349@5868 256 domain->path = talloc_strdup(domain, path);
cl349@5357 257 domain->page = xc_map_foreign_range(*xc_handle, domain->domid,
cl349@5357 258 getpagesize(),
cl349@5357 259 PROT_READ|PROT_WRITE,
cl349@5868 260 mfn);
cl349@5357 261 if (!domain->page)
cl349@5868 262 return NULL;
cl349@5357 263
cl349@5478 264 list_add(&domain->list, &domains);
cl349@5478 265 talloc_set_destructor(domain, destroy_domain);
cl349@5478 266
cl349@5357 267 /* One in each half of page. */
cl349@5357 268 domain->input = domain->page;
cl349@5357 269 domain->output = domain->page + getpagesize()/2;
cl349@5357 270
cl349@5357 271 /* Tell kernel we're interested in this event. */
cl349@5920 272 if (ioctl(eventchn_fd, EVENTCHN_BIND, port) != 0)
cl349@5868 273 return NULL;
cl349@5357 274
cl349@5920 275 domain->port = port;
cl349@5357 276 domain->conn = new_connection(writechn, readchn);
cl349@5357 277 domain->conn->domain = domain;
cl349@5868 278 return domain;
cl349@5868 279 }
cl349@5357 280
cl349@5357 281 /* domid, mfn, evtchn, path */
cl349@5871 282 void do_introduce(struct connection *conn, struct buffered_data *in)
cl349@5357 283 {
cl349@5357 284 struct domain *domain;
cl349@5357 285 char *vec[4];
cl349@5357 286
cl349@5871 287 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
cl349@5871 288 send_error(conn, EINVAL);
cl349@5871 289 return;
cl349@5871 290 }
cl349@5357 291
cl349@5871 292 if (conn->id != 0) {
cl349@5871 293 send_error(conn, EACCES);
cl349@5871 294 return;
cl349@5871 295 }
cl349@5478 296
cl349@5871 297 if (!conn->can_write) {
cl349@5871 298 send_error(conn, EROFS);
cl349@5871 299 return;
cl349@5871 300 }
kaf24@5426 301
cl349@5868 302 /* Sanity check args. */
cl349@5871 303 if ((atoi(vec[2]) <= 0) || !is_valid_nodename(vec[3])) {
cl349@5871 304 send_error(conn, EINVAL);
cl349@5871 305 return;
cl349@5871 306 }
cl349@5868 307 /* Hang domain off "in" until we're finished. */
cl349@5868 308 domain = new_domain(in, atoi(vec[0]), atol(vec[1]), atol(vec[2]),
cl349@5868 309 vec[3]);
cl349@5871 310 if (!domain) {
cl349@5871 311 send_error(conn, errno);
cl349@5871 312 return;
cl349@5871 313 }
cl349@5357 314
cl349@5868 315 /* Now domain belongs to its connection. */
cl349@5357 316 talloc_steal(domain->conn, domain);
cl349@6648 317
cl349@6648 318 fire_watches(conn, "@introduceDomain", false);
cl349@6648 319
cl349@5871 320 send_ack(conn, XS_INTRODUCE);
cl349@5357 321 }
cl349@5357 322
cl349@5357 323 static struct domain *find_domain_by_domid(domid_t domid)
cl349@5357 324 {
cl349@5357 325 struct domain *i;
cl349@5357 326
cl349@5357 327 list_for_each_entry(i, &domains, list) {
cl349@5357 328 if (i->domid == domid)
cl349@5357 329 return i;
cl349@5357 330 }
cl349@5357 331 return NULL;
cl349@5357 332 }
cl349@5357 333
cl349@5357 334 /* domid */
cl349@5871 335 void do_release(struct connection *conn, const char *domid_str)
cl349@5357 336 {
cl349@5357 337 struct domain *domain;
cl349@5357 338 domid_t domid;
cl349@5357 339
cl349@5871 340 if (!domid_str) {
cl349@5871 341 send_error(conn, EINVAL);
cl349@5871 342 return;
cl349@5871 343 }
cl349@5357 344
cl349@5357 345 domid = atoi(domid_str);
cl349@5871 346 if (!domid) {
cl349@5871 347 send_error(conn, EINVAL);
cl349@5871 348 return;
cl349@5871 349 }
cl349@5357 350
cl349@5871 351 if (conn->id != 0) {
cl349@5871 352 send_error(conn, EACCES);
cl349@5871 353 return;
cl349@5871 354 }
cl349@5478 355
cl349@5357 356 domain = find_domain_by_domid(domid);
cl349@5871 357 if (!domain) {
cl349@5871 358 send_error(conn, ENOENT);
cl349@5871 359 return;
cl349@5871 360 }
cl349@5357 361
cl349@5871 362 if (!domain->conn) {
cl349@5871 363 send_error(conn, EINVAL);
cl349@5871 364 return;
cl349@5871 365 }
cl349@5357 366
cl349@5357 367 talloc_free(domain->conn);
cl349@6664 368
cl349@6664 369 fire_watches(NULL, "@releaseDomain", false);
cl349@6664 370
cl349@5871 371 send_ack(conn, XS_RELEASE);
cl349@5357 372 }
cl349@5357 373
cl349@6656 374 void domain_cleanup(void)
cl349@6656 375 {
cl349@6656 376 xc_dominfo_t dominfo;
cl349@6656 377 struct domain *domain, *tmp;
cl349@6664 378 int released = 0;
cl349@6656 379
cl349@6656 380 list_for_each_entry_safe(domain, tmp, &domains, list) {
cl349@6656 381 if (xc_domain_getinfo(*xc_handle, domain->domid, 1,
cl349@6656 382 &dominfo) == 1 &&
cl349@6656 383 dominfo.domid == domain->domid &&
cl349@6656 384 !dominfo.dying && !dominfo.crashed && !dominfo.shutdown)
cl349@6656 385 continue;
cl349@6656 386 talloc_free(domain->conn);
cl349@6664 387 released++;
cl349@6656 388 }
cl349@6664 389
cl349@6664 390 if (released)
cl349@6664 391 fire_watches(NULL, "@releaseDomain", false);
cl349@6656 392 }
cl349@6656 393
cl349@5871 394 void do_get_domain_path(struct connection *conn, const char *domid_str)
cl349@5357 395 {
cl349@5357 396 struct domain *domain;
cl349@5357 397 domid_t domid;
cl349@5357 398
cl349@5871 399 if (!domid_str) {
cl349@5871 400 send_error(conn, EINVAL);
cl349@5871 401 return;
cl349@5871 402 }
cl349@5357 403
cl349@5357 404 domid = atoi(domid_str);
cl349@5751 405 if (domid == DOMID_SELF)
cl349@5357 406 domain = conn->domain;
cl349@5357 407 else
cl349@5357 408 domain = find_domain_by_domid(domid);
cl349@5357 409
cl349@5871 410 if (!domain)
cl349@5871 411 send_error(conn, ENOENT);
cl349@5871 412 else
cl349@6643 413 send_reply(conn, XS_GET_DOMAIN_PATH, domain->path,
cl349@5871 414 strlen(domain->path) + 1);
cl349@5357 415 }
cl349@5357 416
cl349@5357 417 static int close_xc_handle(void *_handle)
cl349@5357 418 {
cl349@5357 419 xc_interface_close(*(int *)_handle);
cl349@5357 420 return 0;
cl349@5357 421 }
cl349@5357 422
cl349@5478 423 /* Returns the implicit path of a connection (only domains have this) */
cl349@5478 424 const char *get_implicit_path(const struct connection *conn)
cl349@5478 425 {
cl349@5478 426 if (!conn->domain)
cl349@5478 427 return NULL;
cl349@5478 428 return conn->domain->path;
cl349@5478 429 }
cl349@5478 430
cl349@5868 431 /* Restore existing connections. */
cl349@5868 432 void restore_existing_connections(void)
cl349@5868 433 {
cl349@5868 434 }
cl349@5868 435
shand@6690 436 #define EVTCHN_DEV_NAME "/dev/xen/evtchn"
shand@6690 437 #define EVTCHN_DEV_MAJOR 10
shand@6690 438 #define EVTCHN_DEV_MINOR 201
shand@6690 439
cl349@5357 440 /* Returns the event channel handle. */
cl349@5357 441 int domain_init(void)
cl349@5357 442 {
shand@6690 443 /* The size of the ringbuffer: half a page minus head structure. */
shand@6690 444 ringbuf_datasize = getpagesize() / 2 - sizeof(struct ringbuf_head);
shand@6690 445
shand@6690 446 xc_handle = talloc(talloc_autofree_context(), int);
shand@6690 447 if (!xc_handle)
shand@6690 448 barf_perror("Failed to allocate domain handle");
cl349@5357 449
shand@6690 450 *xc_handle = xc_interface_open();
shand@6690 451 if (*xc_handle < 0)
shand@6690 452 barf_perror("Failed to open connection to hypervisor (privcmd)");
cl349@5357 453
shand@6690 454 talloc_set_destructor(xc_handle, close_xc_handle);
shand@6690 455
cl349@5357 456 #ifdef TESTING
shand@6690 457 eventchn_fd = fake_open_eventchn();
cl349@5357 458 #else
shand@6690 459 {
shand@6690 460 struct stat st;
shand@6690 461
shand@6690 462 /* Make sure any existing device file links to correct device. */
shand@6690 463 if ( (lstat(EVTCHN_DEV_NAME, &st) != 0) || !S_ISCHR(st.st_mode) ||
shand@6690 464 (st.st_rdev != makedev(EVTCHN_DEV_MAJOR, EVTCHN_DEV_MINOR)) )
shand@6690 465 (void)unlink(EVTCHN_DEV_NAME);
shand@6690 466
shand@6690 467 reopen:
shand@6690 468 eventchn_fd = open(EVTCHN_DEV_NAME, O_NONBLOCK|O_RDWR);
shand@6690 469 if (eventchn_fd == -1) {
shand@6690 470 if ((errno == ENOENT) && (
shand@6690 471 (mkdir("/dev/xen", 0755) == 0) || (errno == EEXIST))
shand@6690 472 && (mknod(EVTCHN_DEV_NAME, S_IFCHR|0600,
shand@6690 473 makedev(EVTCHN_DEV_MAJOR,EVTCHN_DEV_MINOR)) == 0))
shand@6690 474 goto reopen;
shand@6690 475 return -errno;
shand@6690 476 }
shand@6690 477 }
cl349@5357 478 #endif
shand@6690 479 if (eventchn_fd < 0)
shand@6690 480 barf_perror("Failed to open connection to hypervisor (evtchn)");
shand@6690 481
shand@6690 482 if (xc_evtchn_bind_virq(*xc_handle, VIRQ_DOM_EXC, &virq_port))
shand@6690 483 barf_perror("Failed to bind to domain exception virq");
shand@6690 484
shand@6690 485 if (ioctl(eventchn_fd, EVENTCHN_BIND, virq_port) != 0)
shand@6690 486 barf_perror("Failed to bind to domain exception virq port");
shand@6690 487
shand@6690 488 return eventchn_fd;
cl349@5357 489 }