direct-io.hg

view tools/ioemu/target-i386-dm/helper2.c @ 9560:b128f55ca05c

Add code to make handling domain poweroff/reboot symmetrical between
paravirtualized and fully virtualized. This approach uses the new
sched_op to handle other domains than the current domain. The new
code, SCHEDOP_remote_shutdown, is very much like SCHEDOP_shutdown, but
is called with the id of the domain which is to be shut down. This
allows fully virtualized shutdown and para-virtualized shutdown to be
identical from that point forward.

A paravirtualized domain uses sched_op to shut down and set the reason
code. This will send a VIRQ_DOM_EXC, which can be handled in dom0 by
control software. In some ways, this resembles SIGCHILD/waitpid, and
is a reasonable model.

The fully virtualized case has qemu invoke xm directly. This is a
different path than paravirtualized. It also removes decision and
policy making choices from the rest of the control software and places
it within qemu. When any dom0 logic eventually gets a VIRQ_DOM_EXC,
the information about the domain is gone having been destroyed by xm.

A libxenctrl wrapper, xc_shutdown_domain has been added and qemu now
calls it.

As a freebie, #if 0 some very verbose logging code in qemu. Totally
unrelated, but as long as I was there...

Signed-off-by: Ben Thomas <ben@virtualiron.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 06 15:24:00 2006 +0100 (2006-04-06)
parents d6bab69e856f
children 7fba181c8531
line source
1 /*
2 * i386 helpers (without register variable usage)
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Main cpu loop for handling I/O requests coming from a virtual machine
23 *
24 * Copyright 2004, Intel Corporation.
25 * Copyright 2005, International Business Machines Corporation.
26 *
27 * This program is free software; you can redistribute it and/or modify it
28 * under the terms and conditions of the GNU Lesser General Public License,
29 * version 2.1, as published by the Free Software Foundation.
30 *
31 * This program is distributed in the hope it will be useful, but WITHOUT
32 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
33 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
34 * more details.
35 *
36 * You should have received a copy of the GNU Lesser General Public License
37 * along with this program; if not, write to the Free Software Foundation,
38 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307 USA.
39 */
40 #include <stdarg.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <inttypes.h>
45 #include <signal.h>
46 #include <assert.h>
48 #include <limits.h>
49 #include <fcntl.h>
50 #include <sys/ioctl.h>
52 #include <xenctrl.h>
53 #include <xen/hvm/ioreq.h>
54 #include <xen/linux/evtchn.h>
56 #include "cpu.h"
57 #include "exec-all.h"
58 #include "vl.h"
60 extern int domid;
61 extern int vcpus;
63 void *shared_vram;
65 shared_iopage_t *shared_page = NULL;
66 extern int reset_requested;
68 CPUX86State *cpu_86_init(void)
69 {
70 CPUX86State *env;
71 static int inited;
73 cpu_exec_init();
75 env = malloc(sizeof(CPUX86State));
76 if (!env)
77 return NULL;
78 memset(env, 0, sizeof(CPUX86State));
79 /* init various static tables */
80 if (!inited) {
81 inited = 1;
82 }
83 cpu_single_env = env;
84 cpu_reset(env);
85 return env;
86 }
88 /* NOTE: must be called outside the CPU execute loop */
89 void cpu_reset(CPUX86State *env)
90 {
91 }
93 void cpu_x86_close(CPUX86State *env)
94 {
95 free(env);
96 }
99 void cpu_dump_state(CPUState *env, FILE *f,
100 int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
101 int flags)
102 {
103 }
105 /***********************************************************/
106 /* x86 mmu */
107 /* XXX: add PGE support */
109 void cpu_x86_set_a20(CPUX86State *env, int a20_state)
110 {
111 a20_state = (a20_state != 0);
112 if (a20_state != ((env->a20_mask >> 20) & 1)) {
113 #if defined(DEBUG_MMU)
114 printf("A20 update: a20=%d\n", a20_state);
115 #endif
116 env->a20_mask = 0xffefffff | (a20_state << 20);
117 }
118 }
120 target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
121 {
122 return addr;
123 }
125 //the evtchn fd for polling
126 int evtchn_fd = -1;
128 //which vcpu we are serving
129 int send_vcpu = 0;
131 //some functions to handle the io req packet
132 void sp_info()
133 {
134 ioreq_t *req;
135 int i;
137 for ( i = 0; i < vcpus; i++ ) {
138 req = &(shared_page->vcpu_iodata[i].vp_ioreq);
139 term_printf("vcpu %d: event port %d\n",
140 i, shared_page->vcpu_iodata[i].vp_eport);
141 term_printf(" req state: %x, pvalid: %x, addr: %"PRIx64", "
142 "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
143 req->state, req->pdata_valid, req->addr,
144 req->u.data, req->count, req->size);
145 term_printf(" IO totally occurred on this vcpu: %"PRIx64"\n",
146 req->io_count);
147 }
148 }
150 //get the ioreq packets from share mem
151 static ioreq_t* __cpu_get_ioreq(int vcpu)
152 {
153 ioreq_t *req;
155 req = &(shared_page->vcpu_iodata[vcpu].vp_ioreq);
157 if ( req->state == STATE_IOREQ_READY )
158 return req;
160 fprintf(logfile, "False I/O request ... in-service already: "
161 "%x, pvalid: %x, port: %"PRIx64", "
162 "data: %"PRIx64", count: %"PRIx64", size: %"PRIx64"\n",
163 req->state, req->pdata_valid, req->addr,
164 req->u.data, req->count, req->size);
165 return NULL;
166 }
168 //use poll to get the port notification
169 //ioreq_vec--out,the
170 //retval--the number of ioreq packet
171 static ioreq_t* cpu_get_ioreq(void)
172 {
173 int i, rc;
174 evtchn_port_t port;
176 rc = read(evtchn_fd, &port, sizeof(port));
177 if ( rc == sizeof(port) ) {
178 for ( i = 0; i < vcpus; i++ )
179 if ( shared_page->vcpu_iodata[i].dm_eport == port )
180 break;
182 if ( i == vcpus ) {
183 fprintf(logfile, "Fatal error while trying to get io event!\n");
184 exit(1);
185 }
187 // unmask the wanted port again
188 write(evtchn_fd, &port, sizeof(port));
190 //get the io packet from shared memory
191 send_vcpu = i;
192 return __cpu_get_ioreq(i);
193 }
195 //read error or read nothing
196 return NULL;
197 }
199 unsigned long do_inp(CPUState *env, unsigned long addr, unsigned long size)
200 {
201 switch(size) {
202 case 1:
203 return cpu_inb(env, addr);
204 case 2:
205 return cpu_inw(env, addr);
206 case 4:
207 return cpu_inl(env, addr);
208 default:
209 fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size);
210 exit(-1);
211 }
212 }
214 void do_outp(CPUState *env, unsigned long addr,
215 unsigned long size, unsigned long val)
216 {
217 switch(size) {
218 case 1:
219 return cpu_outb(env, addr, val);
220 case 2:
221 return cpu_outw(env, addr, val);
222 case 4:
223 return cpu_outl(env, addr, val);
224 default:
225 fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size);
226 exit(-1);
227 }
228 }
230 extern void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
231 int len, int is_write);
233 static inline void read_physical(uint64_t addr, unsigned long size, void *val)
234 {
235 return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 0);
236 }
238 static inline void write_physical(uint64_t addr, unsigned long size, void *val)
239 {
240 return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 1);
241 }
243 void cpu_ioreq_pio(CPUState *env, ioreq_t *req)
244 {
245 int i, sign;
247 sign = req->df ? -1 : 1;
249 if (req->dir == IOREQ_READ) {
250 if (!req->pdata_valid) {
251 req->u.data = do_inp(env, req->addr, req->size);
252 } else {
253 unsigned long tmp;
255 for (i = 0; i < req->count; i++) {
256 tmp = do_inp(env, req->addr, req->size);
257 write_physical((target_phys_addr_t) req->u.pdata
258 + (sign * i * req->size),
259 req->size, &tmp);
260 }
261 }
262 } else if (req->dir == IOREQ_WRITE) {
263 if (!req->pdata_valid) {
264 do_outp(env, req->addr, req->size, req->u.data);
265 } else {
266 for (i = 0; i < req->count; i++) {
267 unsigned long tmp;
269 read_physical((target_phys_addr_t) req->u.pdata
270 + (sign * i * req->size),
271 req->size, &tmp);
272 do_outp(env, req->addr, req->size, tmp);
273 }
274 }
275 }
276 }
278 void cpu_ioreq_move(CPUState *env, ioreq_t *req)
279 {
280 int i, sign;
282 sign = req->df ? -1 : 1;
284 if (!req->pdata_valid) {
285 if (req->dir == IOREQ_READ) {
286 for (i = 0; i < req->count; i++) {
287 read_physical(req->addr
288 + (sign * i * req->size),
289 req->size, &req->u.data);
290 }
291 } else if (req->dir == IOREQ_WRITE) {
292 for (i = 0; i < req->count; i++) {
293 write_physical(req->addr
294 + (sign * i * req->size),
295 req->size, &req->u.data);
296 }
297 }
298 } else {
299 unsigned long tmp;
301 if (req->dir == IOREQ_READ) {
302 for (i = 0; i < req->count; i++) {
303 read_physical(req->addr
304 + (sign * i * req->size),
305 req->size, &tmp);
306 write_physical((target_phys_addr_t )req->u.pdata
307 + (sign * i * req->size),
308 req->size, &tmp);
309 }
310 } else if (req->dir == IOREQ_WRITE) {
311 for (i = 0; i < req->count; i++) {
312 read_physical((target_phys_addr_t) req->u.pdata
313 + (sign * i * req->size),
314 req->size, &tmp);
315 write_physical(req->addr
316 + (sign * i * req->size),
317 req->size, &tmp);
318 }
319 }
320 }
321 }
323 void cpu_ioreq_and(CPUState *env, ioreq_t *req)
324 {
325 unsigned long tmp1, tmp2;
327 if (req->pdata_valid != 0)
328 hw_error("expected scalar value");
330 read_physical(req->addr, req->size, &tmp1);
331 if (req->dir == IOREQ_WRITE) {
332 tmp2 = tmp1 & (unsigned long) req->u.data;
333 write_physical(req->addr, req->size, &tmp2);
334 }
335 req->u.data = tmp1;
336 }
338 void cpu_ioreq_or(CPUState *env, ioreq_t *req)
339 {
340 unsigned long tmp1, tmp2;
342 if (req->pdata_valid != 0)
343 hw_error("expected scalar value");
345 read_physical(req->addr, req->size, &tmp1);
346 if (req->dir == IOREQ_WRITE) {
347 tmp2 = tmp1 | (unsigned long) req->u.data;
348 write_physical(req->addr, req->size, &tmp2);
349 }
350 req->u.data = tmp1;
351 }
353 void cpu_ioreq_xor(CPUState *env, ioreq_t *req)
354 {
355 unsigned long tmp1, tmp2;
357 if (req->pdata_valid != 0)
358 hw_error("expected scalar value");
360 read_physical(req->addr, req->size, &tmp1);
361 if (req->dir == IOREQ_WRITE) {
362 tmp2 = tmp1 ^ (unsigned long) req->u.data;
363 write_physical(req->addr, req->size, &tmp2);
364 }
365 req->u.data = tmp1;
366 }
368 void cpu_handle_ioreq(CPUState *env)
369 {
370 ioreq_t *req = cpu_get_ioreq();
372 if (req) {
373 req->state = STATE_IOREQ_INPROCESS;
375 if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) {
376 if (req->size != 4)
377 req->u.data &= (1UL << (8 * req->size))-1;
378 }
380 switch (req->type) {
381 case IOREQ_TYPE_PIO:
382 cpu_ioreq_pio(env, req);
383 break;
384 case IOREQ_TYPE_COPY:
385 cpu_ioreq_move(env, req);
386 break;
387 case IOREQ_TYPE_AND:
388 cpu_ioreq_and(env, req);
389 break;
390 case IOREQ_TYPE_OR:
391 cpu_ioreq_or(env, req);
392 break;
393 case IOREQ_TYPE_XOR:
394 cpu_ioreq_xor(env, req);
395 break;
396 default:
397 hw_error("Invalid ioreq type 0x%x\n", req->type);
398 }
400 /* No state change if state = STATE_IORESP_HOOK */
401 if (req->state == STATE_IOREQ_INPROCESS)
402 req->state = STATE_IORESP_READY;
403 env->send_event = 1;
404 }
405 }
407 int xc_handle;
409 void
410 destroy_hvm_domain(void)
411 {
412 int xcHandle;
413 int sts;
415 xcHandle = xc_interface_open();
416 if (xcHandle < 0)
417 fprintf(logfile, "Cannot acquire xenctrl handle\n");
418 else {
419 sts = xc_domain_shutdown(xcHandle, domid, SHUTDOWN_poweroff);
420 if (sts != 0)
421 fprintf(logfile, "? xc_domain_shutdown failed to issue poweroff, sts %d, errno %d\n", sts, errno);
422 else
423 fprintf(logfile, "Issued domain %d poweroff\n", domid);
424 xc_interface_close(xcHandle);
425 }
426 }
428 fd_set wakeup_rfds;
429 int highest_fds;
430 int main_loop(void)
431 {
432 fd_set rfds;
433 struct timeval tv;
434 extern CPUState *global_env;
435 extern int vm_running;
436 extern int shutdown_requested;
437 CPUState *env = global_env;
438 int retval;
439 extern void main_loop_wait(int);
441 /* Watch stdin (fd 0) to see when it has input. */
442 FD_ZERO(&wakeup_rfds);
443 FD_SET(evtchn_fd, &wakeup_rfds);
444 highest_fds = evtchn_fd;
445 env->send_event = 0;
447 while (1) {
448 if (vm_running) {
449 if (shutdown_requested) {
450 break;
451 }
452 if (reset_requested){
453 qemu_system_reset();
454 reset_requested = 0;
455 }
456 }
458 /* Wait up to 10 msec. */
459 tv.tv_sec = 0;
460 tv.tv_usec = 10000;
462 retval = select(highest_fds+1, &wakeup_rfds, NULL, NULL, &tv);
463 if (retval == -1) {
464 fprintf(logfile, "select returned error %d\n", errno);
465 return 0;
466 }
467 rfds = wakeup_rfds;
468 FD_ZERO(&wakeup_rfds);
469 FD_SET(evtchn_fd, &wakeup_rfds);
471 tun_receive_handler(&rfds);
472 if ( FD_ISSET(evtchn_fd, &rfds) ) {
473 cpu_handle_ioreq(env);
474 }
475 main_loop_wait(0);
477 if (env->send_event) {
478 struct ioctl_evtchn_notify notify;
480 env->send_event = 0;
481 notify.port = shared_page->vcpu_iodata[send_vcpu].dm_eport;
482 (void)ioctl(evtchn_fd, IOCTL_EVTCHN_NOTIFY, &notify);
483 }
484 }
485 destroy_hvm_domain();
486 return 0;
487 }
489 static void qemu_hvm_reset(void *unused)
490 {
491 int xcHandle;
492 int sts;
494 /* pause domain first, to avoid repeated reboot request*/
495 xc_domain_pause(xc_handle, domid);
497 xcHandle = xc_interface_open();
498 if (xcHandle < 0)
499 fprintf(logfile, "Cannot acquire xenctrl handle\n");
500 else {
501 sts = xc_domain_shutdown(xcHandle, domid, SHUTDOWN_reboot);
502 if (sts != 0)
503 fprintf(logfile, "? xc_domain_shutdown failed to issue reboot, sts %d\n", sts);
504 else
505 fprintf(logfile, "Issued domain %d reboot\n", domid);
506 xc_interface_close(xcHandle);
507 }
509 }
511 CPUState * cpu_init()
512 {
513 CPUX86State *env;
514 struct ioctl_evtchn_bind_interdomain bind;
515 int i, rc;
517 cpu_exec_init();
518 qemu_register_reset(qemu_hvm_reset, NULL);
519 env = malloc(sizeof(CPUX86State));
520 if (!env)
521 return NULL;
522 memset(env, 0, sizeof(CPUX86State));
524 cpu_single_env = env;
526 if (evtchn_fd != -1)//the evtchn has been opened by another cpu object
527 return NULL;
529 //use nonblock reading not polling, may change in future.
530 evtchn_fd = open("/dev/xen/evtchn", O_RDWR|O_NONBLOCK);
531 if (evtchn_fd == -1) {
532 fprintf(logfile, "open evtchn device error %d\n", errno);
533 return NULL;
534 }
536 /* FIXME: how about if we overflow the page here? */
537 bind.remote_domain = domid;
538 for ( i = 0; i < vcpus; i++ ) {
539 bind.remote_port = shared_page->vcpu_iodata[i].vp_eport;
540 rc = ioctl(evtchn_fd, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind);
541 if ( rc == -1 ) {
542 fprintf(logfile, "bind interdomain ioctl error %d\n", errno);
543 return NULL;
544 }
545 shared_page->vcpu_iodata[i].dm_eport = rc;
546 }
548 return env;
549 }