ia64/xen-unstable

changeset 9262:c445d4a0dd76

Define a new sched_op hypercall called sched_op_new, which differs from the
legacy hypercall in that it takes a pointer to a block of extra arguments
rather than an opaque unsigned long. The old hypercall still exists, for
backwards compatibility.

The new hypercall supports new sub-command SCHEDOP_poll, which can be used to
wait on a set of event-channel ports with an optional timeout. This is exported
in XenLinux as HYPERVISOR_poll, and used in the pcifront driver to wait on a
response from the pciback driver.

Can also be used for debuggers. :-)

Signed-off-by: Keir Fraser <keir@xensource.com>
Signed-off-by: John Levon <john.levon@sun.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Mar 14 19:33:45 2006 +0100 (2006-03-14)
parents e3d7c2183866
children 0ed4a312765b be669c4b1e6b
files linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h linux-2.6-xen-sparse/include/asm-ia64/hypercall.h linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_64/entry.S xen/common/event_channel.c xen/common/schedule.c xen/include/public/event_channel.h xen/include/public/sched.h xen/include/public/xen.h xen/include/xen/event.h xen/include/xen/sched.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Tue Mar 14 17:35:38 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Tue Mar 14 19:33:45 2006 +0100
     1.3 @@ -949,7 +949,7 @@ void __init time_init(void)
     1.4  }
     1.5  
     1.6  /* Convert jiffies to system time. */
     1.7 -static inline u64 jiffies_to_st(unsigned long j)
     1.8 +u64 jiffies_to_st(unsigned long j)
     1.9  {
    1.10  	unsigned long seq;
    1.11  	long delta;
    1.12 @@ -967,6 +967,7 @@ static inline u64 jiffies_to_st(unsigned
    1.13  
    1.14  	return st;
    1.15  }
    1.16 +EXPORT_SYMBOL(jiffies_to_st);
    1.17  
    1.18  /*
    1.19   * stop_hz_timer / start_hz_timer - enter/exit 'tickless mode' on an idle cpu
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c	Tue Mar 14 17:35:38 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c	Tue Mar 14 19:33:45 2006 +0100
     2.3 @@ -40,9 +40,8 @@ static int do_pci_op(struct pcifront_dev
     2.4  {
     2.5  	int err = 0;
     2.6  	struct xen_pci_op *active_op = &pdev->sh_info->op;
     2.7 -	unsigned long irq_flags;
     2.8 -
     2.9 -	unsigned int volatile ttl = (1U << 29);
    2.10 +	unsigned long irq_flags, poll_end;
    2.11 +	evtchn_port_t port = pdev->evtchn;
    2.12  
    2.13  	spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
    2.14  
    2.15 @@ -51,14 +50,17 @@ static int do_pci_op(struct pcifront_dev
    2.16  	/* Go */
    2.17  	wmb();
    2.18  	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
    2.19 -	notify_remote_via_evtchn(pdev->evtchn);
    2.20 +	notify_remote_via_evtchn(port);
    2.21  
    2.22 -	/* IRQs are disabled for the pci config. space reads/writes,
    2.23 -	 * which means no event channel to notify us that the backend
    2.24 -	 * is done so spin while waiting for the answer */
    2.25 -	while (test_bit
    2.26 -	       (_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)) {
    2.27 -		if (!ttl) {
    2.28 +	poll_end = jiffies + 5*HZ;
    2.29 +	clear_evtchn(port);
    2.30 +
    2.31 +	while (test_bit(_XEN_PCIF_active,
    2.32 +			(unsigned long *)&pdev->sh_info->flags)) {
    2.33 +		if (HYPERVISOR_poll(&port, 1, poll_end))
    2.34 +			BUG();
    2.35 +		clear_evtchn(port);
    2.36 +		if (time_after(jiffies, poll_end)) {
    2.37  			dev_err(&pdev->xdev->dev,
    2.38  				"pciback not responding!!!\n");
    2.39  			clear_bit(_XEN_PCIF_active,
    2.40 @@ -66,7 +68,6 @@ static int do_pci_op(struct pcifront_dev
    2.41  			err = XEN_PCI_ERR_dev_not_found;
    2.42  			goto out;
    2.43  		}
    2.44 -		ttl--;
    2.45  	}
    2.46  
    2.47  	memcpy(op, active_op, sizeof(struct xen_pci_op));
     3.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Tue Mar 14 17:35:38 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Tue Mar 14 19:33:45 2006 +0100
     3.3 @@ -33,6 +33,7 @@
     3.4  #include <xen/interface/xen.h>
     3.5  #include <xen/interface/sched.h>
     3.6  #include <xen/interface/nmi.h>
     3.7 +#include <linux/errno.h>
     3.8  
     3.9  #define __STR(x) #x
    3.10  #define STR(x) __STR(x)
    3.11 @@ -169,6 +170,31 @@ HYPERVISOR_sched_op(
    3.12  	return _hypercall2(int, sched_op, cmd, arg);
    3.13  }
    3.14  
    3.15 +static inline int
    3.16 +HYPERVISOR_sched_op_new(
    3.17 +	int cmd, void *arg)
    3.18 +{
    3.19 +	return _hypercall2(int, sched_op_new, cmd, arg);
    3.20 +}
    3.21 +
    3.22 +static inline int
    3.23 +HYPERVISOR_poll(
    3.24 +	evtchn_port_t *ports, unsigned int nr_ports, u64 timeout)
    3.25 +{
    3.26 +	struct sched_poll sched_poll = {
    3.27 +		.ports = ports,
    3.28 +		.nr_ports = nr_ports,
    3.29 +		.timeout = jiffies_to_st(timeout)
    3.30 +	};
    3.31 +
    3.32 +	int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll);
    3.33 +
    3.34 +	if (rc == -ENOSYS)
    3.35 +		rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0);
    3.36 +
    3.37 +	return rc;
    3.38 +}
    3.39 +
    3.40  static inline long
    3.41  HYPERVISOR_set_timer_op(
    3.42  	u64 timeout)
     4.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h	Tue Mar 14 17:35:38 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h	Tue Mar 14 19:33:45 2006 +0100
     4.3 @@ -97,6 +97,9 @@ int xen_create_contiguous_region(
     4.4  void xen_destroy_contiguous_region(
     4.5      unsigned long vstart, unsigned int order);
     4.6  
     4.7 +/* Turn jiffies into Xen system time. */
     4.8 +u64 jiffies_to_st(unsigned long jiffies);
     4.9 +
    4.10  #include <asm/hypercall.h>
    4.11  
    4.12  #if defined(CONFIG_X86_64)
     5.1 --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h	Tue Mar 14 17:35:38 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h	Tue Mar 14 19:33:45 2006 +0100
     5.3 @@ -32,6 +32,7 @@
     5.4  
     5.5  #include <xen/interface/xen.h>
     5.6  #include <xen/interface/sched.h>
     5.7 +#include <linux/errno.h>
     5.8  
     5.9  /* FIXME: temp place to hold these page related macros */
    5.10  #include <asm/page.h>
    5.11 @@ -165,6 +166,31 @@ HYPERVISOR_sched_op(
    5.12  	return _hypercall2(int, sched_op, cmd, arg);
    5.13  }
    5.14  
    5.15 +static inline int
    5.16 +HYPERVISOR_sched_op_new(
    5.17 +	int cmd, void *arg)
    5.18 +{
    5.19 +	return _hypercall2(int, sched_op_new, cmd, arg);
    5.20 +}
    5.21 +
    5.22 +static inline int
    5.23 +HYPERVISOR_poll(
    5.24 +	evtchn_port_t *ports, unsigned int nr_ports, unsigned long timeout)
    5.25 +{
    5.26 +	struct sched_poll sched_poll = {
    5.27 +		.ports = ports,
    5.28 +		.nr_ports = nr_ports,
    5.29 +		.timeout = jiffies_to_st(timeout)
    5.30 +	};
    5.31 +
    5.32 +	int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll);
    5.33 +
    5.34 +	if (rc == -ENOSYS)
    5.35 +		rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0);
    5.36 +
    5.37 +	return rc;
    5.38 +}
    5.39 +
    5.40  static inline long
    5.41  HYPERVISOR_set_timer_op(
    5.42      u64 timeout)
     6.1 --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h	Tue Mar 14 17:35:38 2006 +0100
     6.2 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h	Tue Mar 14 19:33:45 2006 +0100
     6.3 @@ -44,6 +44,9 @@ extern start_info_t *xen_start_info;
     6.4  
     6.5  void force_evtchn_callback(void);
     6.6  
     6.7 +/* Turn jiffies into Xen system time. XXX Implement me. */
     6.8 +#define jiffies_to_st(j)	0
     6.9 +
    6.10  #include <asm/hypercall.h>
    6.11  
    6.12  // for drivers/xen/privcmd/privcmd.c
     7.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Tue Mar 14 17:35:38 2006 +0100
     7.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Tue Mar 14 19:33:45 2006 +0100
     7.3 @@ -37,6 +37,7 @@
     7.4  #include <xen/interface/xen.h>
     7.5  #include <xen/interface/sched.h>
     7.6  #include <xen/interface/nmi.h>
     7.7 +#include <linux/errno.h>
     7.8  
     7.9  #define __STR(x) #x
    7.10  #define STR(x) __STR(x)
    7.11 @@ -174,6 +175,31 @@ HYPERVISOR_sched_op(
    7.12  	return _hypercall2(int, sched_op, cmd, arg);
    7.13  }
    7.14  
    7.15 +static inline int
    7.16 +HYPERVISOR_sched_op_new(
    7.17 +	int cmd, void *arg)
    7.18 +{
    7.19 +	return _hypercall2(int, sched_op_new, cmd, arg);
    7.20 +}
    7.21 +
    7.22 +static inline int
    7.23 +HYPERVISOR_poll(
    7.24 +	evtchn_port_t *ports, unsigned int nr_ports, u64 timeout)
    7.25 +{
    7.26 +	struct sched_poll sched_poll = {
    7.27 +		.ports = ports,
    7.28 +		.nr_ports = nr_ports,
    7.29 +		.timeout = jiffies_to_st(timeout)
    7.30 +	};
    7.31 +
    7.32 +	int rc = HYPERVISOR_sched_op_new(SCHEDOP_poll, &sched_poll);
    7.33 +
    7.34 +	if (rc == -ENOSYS)
    7.35 +		rc = HYPERVISOR_sched_op(SCHEDOP_yield, 0);
    7.36 +
    7.37 +	return rc;
    7.38 +}
    7.39 +
    7.40  static inline long
    7.41  HYPERVISOR_set_timer_op(
    7.42  	u64 timeout)
     8.1 --- a/xen/arch/x86/x86_32/entry.S	Tue Mar 14 17:35:38 2006 +0100
     8.2 +++ b/xen/arch/x86/x86_32/entry.S	Tue Mar 14 19:33:45 2006 +0100
     8.3 @@ -586,6 +586,13 @@ do_arch_sched_op:
     8.4          movl %eax,UREGS_eax(%ecx)
     8.5          jmp  do_sched_op
     8.6  
     8.7 +do_arch_sched_op_new:
     8.8 +        # Ensure we return success even if we return via schedule_tail()
     8.9 +        xorl %eax,%eax
    8.10 +        GET_GUEST_REGS(%ecx)
    8.11 +        movl %eax,UREGS_eax(%ecx)
    8.12 +        jmp  do_sched_op_new
    8.13 +
    8.14  .data
    8.15  
    8.16  ENTRY(exception_table)
    8.17 @@ -640,6 +647,7 @@ ENTRY(hypercall_table)
    8.18          .long do_mmuext_op
    8.19          .long do_acm_op
    8.20          .long do_nmi_op
    8.21 +        .long do_arch_sched_op_new
    8.22          .rept NR_hypercalls-((.-hypercall_table)/4)
    8.23          .long do_ni_hypercall
    8.24          .endr
    8.25 @@ -674,6 +682,7 @@ ENTRY(hypercall_args_table)
    8.26          .byte 4 /* do_mmuext_op         */
    8.27          .byte 1 /* do_acm_op            */
    8.28          .byte 2 /* do_nmi_op            */
    8.29 +        .byte 2 /* do_arch_sched_op_new */
    8.30          .rept NR_hypercalls-(.-hypercall_args_table)
    8.31          .byte 0 /* do_ni_hypercall      */
    8.32          .endr
     9.1 --- a/xen/arch/x86/x86_64/entry.S	Tue Mar 14 17:35:38 2006 +0100
     9.2 +++ b/xen/arch/x86/x86_64/entry.S	Tue Mar 14 19:33:45 2006 +0100
     9.3 @@ -495,6 +495,13 @@ do_arch_sched_op:
     9.4          movq  %rax,UREGS_rax(%r10)
     9.5          jmp   do_sched_op
     9.6  
     9.7 +do_arch_sched_op_new:
     9.8 +        # Ensure we return success even if we return via schedule_tail()
     9.9 +        xorl  %eax,%eax
    9.10 +        GET_GUEST_REGS(%r10)
    9.11 +        movq  %rax,UREGS_rax(%r10)
    9.12 +        jmp   do_sched_op_new
    9.13 +
    9.14  .data
    9.15  
    9.16  ENTRY(exception_table)
    9.17 @@ -549,6 +556,7 @@ ENTRY(hypercall_table)
    9.18          .quad do_mmuext_op
    9.19          .quad do_acm_op
    9.20          .quad do_nmi_op
    9.21 +        .quad do_arch_sched_op_new
    9.22          .rept NR_hypercalls-((.-hypercall_table)/8)
    9.23          .quad do_ni_hypercall
    9.24          .endr
    9.25 @@ -583,6 +591,7 @@ ENTRY(hypercall_args_table)
    9.26          .byte 4 /* do_mmuext_op         */
    9.27          .byte 1 /* do_acm_op            */
    9.28          .byte 2 /* do_nmi_op            */
    9.29 +        .byte 2 /* do_arch_sched_op_new */
    9.30          .rept NR_hypercalls-(.-hypercall_args_table)
    9.31          .byte 0 /* do_ni_hypercall      */
    9.32          .endr
    10.1 --- a/xen/common/event_channel.c	Tue Mar 14 17:35:38 2006 +0100
    10.2 +++ b/xen/common/event_channel.c	Tue Mar 14 19:33:45 2006 +0100
    10.3 @@ -438,6 +438,47 @@ long evtchn_send(unsigned int lport)
    10.4      return ret;
    10.5  }
    10.6  
    10.7 +void evtchn_set_pending(struct vcpu *v, int port)
    10.8 +{
    10.9 +    struct domain *d = v->domain;
   10.10 +    shared_info_t *s = d->shared_info;
   10.11 +
   10.12 +    /*
   10.13 +     * The following bit operations must happen in strict order.
   10.14 +     * NB. On x86, the atomic bit operations also act as memory barriers.
   10.15 +     * There is therefore sufficiently strict ordering for this architecture --
   10.16 +     * others may require explicit memory barriers.
   10.17 +     */
   10.18 +
   10.19 +    if ( test_and_set_bit(port, &s->evtchn_pending[0]) )
   10.20 +        return;
   10.21 +
   10.22 +    if ( !test_bit        (port, &s->evtchn_mask[0])    &&
   10.23 +         !test_and_set_bit(port / BITS_PER_LONG,
   10.24 +                           &v->vcpu_info->evtchn_pending_sel) &&
   10.25 +         !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
   10.26 +    {
   10.27 +        evtchn_notify(v);
   10.28 +    }
   10.29 +    else if ( unlikely(test_bit(_VCPUF_blocked, &v->vcpu_flags) &&
   10.30 +                       v->vcpu_info->evtchn_upcall_mask) )
   10.31 +    {
   10.32 +        /*
   10.33 +         * Blocked and masked will usually mean that the VCPU executed 
   10.34 +         * SCHEDOP_poll. Kick the VCPU in case this port is in its poll list.
   10.35 +         */
   10.36 +        vcpu_unblock(v);
   10.37 +    }
   10.38 +}
   10.39 +
   10.40 +void send_guest_virq(struct vcpu *v, int virq)
   10.41 +{
   10.42 +    int port = v->virq_to_evtchn[virq];
   10.43 +
   10.44 +    if ( likely(port != 0) )
   10.45 +        evtchn_set_pending(v, port);
   10.46 +}
   10.47 +
   10.48  void send_guest_pirq(struct domain *d, int pirq)
   10.49  {
   10.50      int port = d->pirq_to_evtchn[pirq];
    11.1 --- a/xen/common/schedule.c	Tue Mar 14 17:35:38 2006 +0100
    11.2 +++ b/xen/common/schedule.c	Tue Mar 14 19:33:45 2006 +0100
    11.3 @@ -27,6 +27,7 @@
    11.4  #include <xen/softirq.h>
    11.5  #include <xen/trace.h>
    11.6  #include <xen/mm.h>
    11.7 +#include <xen/guest_access.h>
    11.8  #include <public/sched.h>
    11.9  #include <public/sched_ctl.h>
   11.10  
   11.11 @@ -42,6 +43,7 @@ string_param("sched", opt_sched);
   11.12  static void s_timer_fn(void *unused);
   11.13  static void t_timer_fn(void *unused);
   11.14  static void dom_timer_fn(void *data);
   11.15 +static void poll_timer_fn(void *data);
   11.16  
   11.17  /* This is global for now so that private implementations can reach it */
   11.18  struct schedule_data schedule_data[NR_CPUS];
   11.19 @@ -164,8 +166,9 @@ struct vcpu *alloc_vcpu(
   11.20  
   11.21  void sched_add_domain(struct vcpu *v) 
   11.22  {
   11.23 -    /* Initialise the per-domain timer. */
   11.24 +    /* Initialise the per-domain timers. */
   11.25      init_timer(&v->timer, dom_timer_fn, v, v->processor);
   11.26 +    init_timer(&v->poll_timer, poll_timer_fn, v, v->processor);
   11.27  
   11.28      if ( is_idle_vcpu(v) )
   11.29      {
   11.30 @@ -181,6 +184,8 @@ void sched_add_domain(struct vcpu *v)
   11.31  void sched_rem_domain(struct vcpu *v) 
   11.32  {
   11.33      kill_timer(&v->timer);
   11.34 +    kill_timer(&v->poll_timer);
   11.35 +
   11.36      SCHED_OP(rem_task, v);
   11.37      TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
   11.38  }
   11.39 @@ -270,6 +275,55 @@ static long do_block(void)
   11.40      return 0;
   11.41  }
   11.42  
   11.43 +static long do_poll(struct sched_poll *sched_poll)
   11.44 +{
   11.45 +    struct vcpu  *v = current;
   11.46 +    evtchn_port_t port;
   11.47 +    long          rc = 0;
   11.48 +    unsigned int  i;
   11.49 +
   11.50 +    /* Fairly arbitrary limit. */
   11.51 +    if ( sched_poll->nr_ports > 128 )
   11.52 +        return -EINVAL;
   11.53 +
   11.54 +    if ( !guest_handle_okay(sched_poll->ports, sched_poll->nr_ports) )
   11.55 +        return -EFAULT;
   11.56 +
   11.57 +    /* Ensure that upcalls are disabled: tested by evtchn_set_pending(). */
   11.58 +    if ( !v->vcpu_info->evtchn_upcall_mask )
   11.59 +        return -EINVAL;
   11.60 +
   11.61 +    set_bit(_VCPUF_blocked, &v->vcpu_flags);
   11.62 +
   11.63 +    /* Check for events /after/ blocking: avoids wakeup waiting race. */
   11.64 +    for ( i = 0; i < sched_poll->nr_ports; i++ )
   11.65 +    {
   11.66 +        rc = -EFAULT;
   11.67 +        if ( __copy_from_guest_offset(&port, sched_poll->ports, i, 1) )
   11.68 +            goto out;
   11.69 +
   11.70 +        rc = -EINVAL;
   11.71 +        if ( port >= MAX_EVTCHNS )
   11.72 +            goto out;
   11.73 +
   11.74 +        rc = 0;
   11.75 +        if ( evtchn_pending(v->domain, port) )
   11.76 +            goto out;
   11.77 +    }
   11.78 +
   11.79 +    if ( sched_poll->timeout != 0 )
   11.80 +        set_timer(&v->poll_timer, sched_poll->timeout);
   11.81 +
   11.82 +    TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
   11.83 +    __enter_scheduler();
   11.84 +
   11.85 +    stop_timer(&v->poll_timer);
   11.86 +
   11.87 + out:
   11.88 +    clear_bit(_VCPUF_blocked, &v->vcpu_flags);
   11.89 +    return rc;
   11.90 +}
   11.91 +
   11.92  /* Voluntarily yield the processor for this allocation. */
   11.93  static long do_yield(void)
   11.94  {
   11.95 @@ -311,6 +365,61 @@ long do_sched_op(int cmd, unsigned long 
   11.96      return ret;
   11.97  }
   11.98  
   11.99 +long do_sched_op_new(int cmd, GUEST_HANDLE(void) arg)
  11.100 +{
  11.101 +    long ret = 0;
  11.102 +
  11.103 +    switch ( cmd )
  11.104 +    {
  11.105 +    case SCHEDOP_yield:
  11.106 +    {
  11.107 +        ret = do_yield();
  11.108 +        break;
  11.109 +    }
  11.110 +
  11.111 +    case SCHEDOP_block:
  11.112 +    {
  11.113 +        ret = do_block();
  11.114 +        break;
  11.115 +    }
  11.116 +
  11.117 +    case SCHEDOP_shutdown:
  11.118 +    {
  11.119 +        struct sched_shutdown sched_shutdown;
  11.120 +
  11.121 +        ret = -EFAULT;
  11.122 +        if ( copy_from_guest(&sched_shutdown, arg, 1) )
  11.123 +            break;
  11.124 +
  11.125 +        ret = 0;
  11.126 +        TRACE_3D(TRC_SCHED_SHUTDOWN,
  11.127 +                 current->domain->domain_id, current->vcpu_id,
  11.128 +                 sched_shutdown.reason);
  11.129 +        domain_shutdown(current->domain, (u8)sched_shutdown.reason);
  11.130 +
  11.131 +        break;
  11.132 +    }
  11.133 +
  11.134 +    case SCHEDOP_poll:
  11.135 +    {
  11.136 +        struct sched_poll sched_poll;
  11.137 +
  11.138 +        ret = -EFAULT;
  11.139 +        if ( copy_from_guest(&sched_poll, arg, 1) )
  11.140 +            break;
  11.141 +
  11.142 +        ret = do_poll(&sched_poll);
  11.143 +
  11.144 +        break;
  11.145 +    }
  11.146 +
  11.147 +    default:
  11.148 +        ret = -ENOSYS;
  11.149 +    }
  11.150 +
  11.151 +    return ret;
  11.152 +}
  11.153 +
  11.154  /* Per-domain one-shot-timer hypercall. */
  11.155  long do_set_timer_op(s_time_t timeout)
  11.156  {
  11.157 @@ -518,6 +627,13 @@ static void dom_timer_fn(void *data)
  11.158      send_guest_virq(v, VIRQ_TIMER);
  11.159  }
  11.160  
  11.161 +/* SCHEDOP_poll timeout callback. */
  11.162 +static void poll_timer_fn(void *data)
  11.163 +{
  11.164 +    struct vcpu *v = data;
  11.165 +    vcpu_unblock(v);
  11.166 +}
  11.167 +
  11.168  /* Initialise the data structures. */
  11.169  void __init scheduler_init(void)
  11.170  {
    12.1 --- a/xen/include/public/event_channel.h	Tue Mar 14 17:35:38 2006 +0100
    12.2 +++ b/xen/include/public/event_channel.h	Tue Mar 14 19:33:45 2006 +0100
    12.3 @@ -10,6 +10,7 @@
    12.4  #define __XEN_PUBLIC_EVENT_CHANNEL_H__
    12.5  
    12.6  typedef uint32_t evtchn_port_t;
    12.7 +DEFINE_GUEST_HANDLE(evtchn_port_t);
    12.8  
    12.9  /*
   12.10   * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
    13.1 --- a/xen/include/public/sched.h	Tue Mar 14 17:35:38 2006 +0100
    13.2 +++ b/xen/include/public/sched.h	Tue Mar 14 19:33:45 2006 +0100
    13.3 @@ -9,16 +9,32 @@
    13.4  #ifndef __XEN_PUBLIC_SCHED_H__
    13.5  #define __XEN_PUBLIC_SCHED_H__
    13.6  
    13.7 +#include "event_channel.h"
    13.8 +
    13.9  /*
   13.10 - * Prototype for this hypercall is:
   13.11 - *  int sched_op(int cmd, unsigned long arg)
   13.12 + * There are two forms of this hypercall.
   13.13 + * 
   13.14 + * The first and preferred version is only available from Xen 3.0.2. 
   13.15 + * The prototype for this hypercall is:
   13.16 + *  long sched_op_new(int cmd, void *arg)
   13.17   * @cmd == SCHEDOP_??? (scheduler operation).
   13.18 - * @arg == Operation-specific extra argument(s).
   13.19 + * @arg == Operation-specific extra argument(s), as described below.
   13.20 + * 
   13.21 + * The legacy version of this hypercall supports only the following commands:
   13.22 + * SCHEDOP_yield, SCHEDOP_block, and SCHEDOP_shutdown. The prototype for the
   13.23 + * legacy hypercall is:
   13.24 + *  long sched_op(int cmd, unsigned long arg)
   13.25 + * @cmd == SCHEDOP_??? (scheduler operation).
   13.26 + * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
   13.27 + *      == SHUTDOWN_* code (SCHEDOP_shutdown)
   13.28 + * 
   13.29 + * The sub-command descriptions below describe extra arguments for the
   13.30 + * sched_op_new() hypercall.
   13.31   */
   13.32  
   13.33  /*
   13.34   * Voluntarily yield the CPU.
   13.35 - * @arg == 0.
   13.36 + * @arg == NULL.
   13.37   */
   13.38  #define SCHEDOP_yield       0
   13.39  
   13.40 @@ -27,18 +43,35 @@
   13.41   * If called with event upcalls masked, this operation will atomically
   13.42   * reenable event delivery and check for pending events before blocking the
   13.43   * VCPU. This avoids a "wakeup waiting" race.
   13.44 - * @arg == 0.
   13.45 + * @arg == NULL.
   13.46   */
   13.47  #define SCHEDOP_block       1
   13.48  
   13.49  /*
   13.50   * Halt execution of this domain (all VCPUs) and notify the system controller.
   13.51 - * @arg == SHUTDOWN_??? (reason for shutdown).
   13.52 + * @arg == pointer to sched_shutdown structure.
   13.53   */
   13.54  #define SCHEDOP_shutdown    2
   13.55 +typedef struct sched_shutdown {
   13.56 +    unsigned int reason; /* SHUTDOWN_* */
   13.57 +} sched_shutdown_t;
   13.58 +DEFINE_GUEST_HANDLE(sched_shutdown_t);
   13.59  
   13.60  /*
   13.61 - * Reason codes for SCHEDOP_shutdown. These may be interpreted by controller
   13.62 + * Poll a set of event-channel ports. Return when one or more are pending. An
   13.63 + * optional timeout may be specified.
   13.64 + * @arg == pointer to sched_poll structure.
   13.65 + */
   13.66 +#define SCHEDOP_poll        3
   13.67 +typedef struct sched_poll {
   13.68 +    GUEST_HANDLE(evtchn_port_t) ports;
   13.69 +    unsigned int nr_ports;
   13.70 +    uint64_t timeout;
   13.71 +} sched_poll_t;
   13.72 +DEFINE_GUEST_HANDLE(sched_poll_t);
   13.73 +
   13.74 +/*
   13.75 + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
   13.76   * software to determine the appropriate action. For the most part, Xen does
   13.77   * not care about the shutdown code.
   13.78   */
    14.1 --- a/xen/include/public/xen.h	Tue Mar 14 17:35:38 2006 +0100
    14.2 +++ b/xen/include/public/xen.h	Tue Mar 14 19:33:45 2006 +0100
    14.3 @@ -59,6 +59,7 @@
    14.4  #define __HYPERVISOR_mmuext_op            26
    14.5  #define __HYPERVISOR_acm_op               27
    14.6  #define __HYPERVISOR_nmi_op               28
    14.7 +#define __HYPERVISOR_sched_op_new         29
    14.8  
    14.9  /* 
   14.10   * VIRTUAL INTERRUPTS
    15.1 --- a/xen/include/xen/event.h	Tue Mar 14 17:35:38 2006 +0100
    15.2 +++ b/xen/include/xen/event.h	Tue Mar 14 19:33:45 2006 +0100
    15.3 @@ -15,41 +15,14 @@
    15.4  #include <asm/bitops.h>
    15.5  #include <asm/event.h>
    15.6  
    15.7 -/*
    15.8 - * EVENT-CHANNEL NOTIFICATIONS
    15.9 - * NB. On x86, the atomic bit operations also act as memory barriers. There
   15.10 - * is therefore sufficiently strict ordering for this architecture -- others
   15.11 - * may require explicit memory barriers.
   15.12 - */
   15.13 -
   15.14 -static inline void evtchn_set_pending(struct vcpu *v, int port)
   15.15 -{
   15.16 -    struct domain *d = v->domain;
   15.17 -    shared_info_t *s = d->shared_info;
   15.18 -
   15.19 -    /* These four operations must happen in strict order. */
   15.20 -    if ( !test_and_set_bit(port, &s->evtchn_pending[0]) &&
   15.21 -         !test_bit        (port, &s->evtchn_mask[0])    &&
   15.22 -         !test_and_set_bit(port / BITS_PER_LONG,
   15.23 -                           &v->vcpu_info->evtchn_pending_sel) &&
   15.24 -         !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) )
   15.25 -    {
   15.26 -        evtchn_notify(v);
   15.27 -    }
   15.28 -}
   15.29 +extern void evtchn_set_pending(struct vcpu *v, int port);
   15.30  
   15.31  /*
   15.32   * send_guest_virq:
   15.33   *  @v:        VCPU to which virtual IRQ should be sent
   15.34   *  @virq:     Virtual IRQ number (VIRQ_*)
   15.35   */
   15.36 -static inline void send_guest_virq(struct vcpu *v, int virq)
   15.37 -{
   15.38 -    int port = v->virq_to_evtchn[virq];
   15.39 -
   15.40 -    if ( likely(port != 0) )
   15.41 -        evtchn_set_pending(v, port);
   15.42 -}
   15.43 +extern void send_guest_virq(struct vcpu *v, int virq);
   15.44  
   15.45  /*
   15.46   * send_guest_pirq:
   15.47 @@ -63,6 +36,9 @@ extern void send_guest_pirq(struct domai
   15.48      (!!(v)->vcpu_info->evtchn_upcall_pending &  \
   15.49        !(v)->vcpu_info->evtchn_upcall_mask)
   15.50  
   15.51 +#define evtchn_pending(d, p)                    \
   15.52 +    (test_bit((p), &(d)->shared_info->evtchn_pending[0]))
   15.53 +
   15.54  /* Send a notification from a local event-channel port. */
   15.55  extern long evtchn_send(unsigned int lport);
   15.56  
    16.1 --- a/xen/include/xen/sched.h	Tue Mar 14 17:35:38 2006 +0100
    16.2 +++ b/xen/include/xen/sched.h	Tue Mar 14 19:33:45 2006 +0100
    16.3 @@ -67,6 +67,8 @@ struct vcpu
    16.4      struct timer     timer;         /* one-shot timer for timeout values */
    16.5      unsigned long    sleep_tick;    /* tick at which this vcpu started sleep */
    16.6  
    16.7 +    struct timer     poll_timer;    /* timeout for SCHEDOP_poll */
    16.8 +
    16.9      void            *sched_priv;    /* scheduler-specific data */
   16.10  
   16.11      struct vcpu_runstate_info runstate;