ia64/xen-unstable

changeset 14340:215b799fa181

xen: New vcpu_op commands for setting periodic and single-shot timers.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Fri Mar 09 18:26:47 2007 +0000 (2007-03-09)
parents 1a01d8d9dbec
children 818da23b7571
files xen/arch/ia64/xen/xensetup.c xen/arch/powerpc/setup.c xen/arch/x86/apic.c xen/arch/x86/setup.c xen/common/compat/domain.c xen/common/domain.c xen/common/page_alloc.c xen/common/sched_credit.c xen/common/schedule.c xen/include/public/vcpu.h xen/include/xen/mm.h xen/include/xen/sched-if.h xen/include/xen/sched.h xen/include/xen/timer.h
line diff
     1.1 --- a/xen/arch/ia64/xen/xensetup.c	Fri Mar 09 18:26:22 2007 +0000
     1.2 +++ b/xen/arch/ia64/xen/xensetup.c	Fri Mar 09 18:26:47 2007 +0000
     1.3 @@ -543,8 +543,6 @@ printk("num_online_cpus=%d, max_cpus=%d\
     1.4  
     1.5      domain0_ready = 1;
     1.6  
     1.7 -    schedulers_start();
     1.8 -
     1.9      domain_unpause_by_systemcontroller(dom0);
    1.10  
    1.11      startup_cpu_idle_loop();
     2.1 --- a/xen/arch/powerpc/setup.c	Fri Mar 09 18:26:22 2007 +0000
     2.2 +++ b/xen/arch/powerpc/setup.c	Fri Mar 09 18:26:47 2007 +0000
     2.3 @@ -365,9 +365,6 @@ static void __init __start_xen(multiboot
     2.4          kick_secondary_cpus(max_cpus);
     2.5      }
     2.6  
     2.7 -    /* Secondary processors must be online before we call this.  */
     2.8 -    schedulers_start();
     2.9 -
    2.10      /* This cannot be called before secondary cpus are marked online.  */
    2.11      percpu_free_unused_areas();
    2.12  
     3.1 --- a/xen/arch/x86/apic.c	Fri Mar 09 18:26:22 2007 +0000
     3.2 +++ b/xen/arch/x86/apic.c	Fri Mar 09 18:26:47 2007 +0000
     3.3 @@ -1023,6 +1023,13 @@ int reprogram_timer(s_time_t timeout)
     3.4      u64         apic_tmict;
     3.5  
     3.6      /*
     3.7 +     * If we don't have local APIC then we just poll the timer list off the
     3.8 +     * PIT interrupt.
     3.9 +     */
    3.10 +    if ( !cpu_has_apic )
    3.11 +        return 1;
    3.12 +
    3.13 +    /*
    3.14       * We use this value because we don't trust zero (we think it may just
    3.15       * cause an immediate interrupt). At least this is guaranteed to hold it
    3.16       * off for ages (esp. since the clock ticks on bus clock, not cpu clock!).
    3.17 @@ -1044,13 +1051,6 @@ int reprogram_timer(s_time_t timeout)
    3.18          return 0;
    3.19      }
    3.20  
    3.21 -    /*
    3.22 -     * If we don't have local APIC then we just poll the timer list off the
    3.23 -     * PIT interrupt. Cheesy but good enough to work on eg. VMware :-)
    3.24 -     */
    3.25 -    if ( !cpu_has_apic )
    3.26 -        return 1;
    3.27 -
    3.28      /* conversion to bus units */
    3.29      apic_tmict = (((u64)bus_scale) * expire)>>18;
    3.30  
     4.1 --- a/xen/arch/x86/setup.c	Fri Mar 09 18:26:22 2007 +0000
     4.2 +++ b/xen/arch/x86/setup.c	Fri Mar 09 18:26:47 2007 +0000
     4.3 @@ -195,13 +195,13 @@ static void __init percpu_free_unused_ar
     4.4  
     4.5      /* Find first unused CPU number. */
     4.6      for ( i = 0; i < NR_CPUS; i++ )
     4.7 -        if ( !cpu_online(i) )
     4.8 +        if ( !cpu_possible(i) )
     4.9              break;
    4.10      first_unused = i;
    4.11  
    4.12 -    /* Check that there are no holes in cpu_online_map. */
    4.13 +    /* Check that there are no holes in cpu_possible_map. */
    4.14      for ( ; i < NR_CPUS; i++ )
    4.15 -        BUG_ON(cpu_online(i));
    4.16 +        BUG_ON(cpu_possible(i));
    4.17  
    4.18  #ifndef MEMORY_GUARD
    4.19      init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
    4.20 @@ -717,8 +717,6 @@ void __init __start_xen(multiboot_info_t
    4.21  
    4.22      do_initcalls();
    4.23  
    4.24 -    schedulers_start();
    4.25 -
    4.26      if ( opt_watchdog ) 
    4.27          watchdog_enable();
    4.28  
     5.1 --- a/xen/common/compat/domain.c	Fri Mar 09 18:26:22 2007 +0000
     5.2 +++ b/xen/common/compat/domain.c	Fri Mar 09 18:26:47 2007 +0000
     5.3 @@ -55,6 +55,10 @@ int compat_vcpu_op(int cmd, int vcpuid, 
     5.4      case VCPUOP_up:
     5.5      case VCPUOP_down:
     5.6      case VCPUOP_is_up:
     5.7 +    case VCPUOP_set_periodic_timer:
     5.8 +    case VCPUOP_stop_periodic_timer:
     5.9 +    case VCPUOP_set_singleshot_timer:
    5.10 +    case VCPUOP_stop_singleshot_timer:
    5.11          rc = do_vcpu_op(cmd, vcpuid, arg);
    5.12          break;
    5.13  
     6.1 --- a/xen/common/domain.c	Fri Mar 09 18:26:22 2007 +0000
     6.2 +++ b/xen/common/domain.c	Fri Mar 09 18:26:47 2007 +0000
     6.3 @@ -102,6 +102,9 @@ struct vcpu *alloc_vcpu(
     6.4      v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
     6.5      v->runstate.state_entry_time = NOW();
     6.6  
     6.7 +    /* VCPUs by default have a 100Hz ticker. */
     6.8 +    v->periodic_period = MILLISECS(10);
     6.9 +
    6.10      if ( (vcpu_id != 0) && !is_idle_domain(d) )
    6.11          set_bit(_VCPUF_down, &v->vcpu_flags);
    6.12  
    6.13 @@ -267,6 +270,9 @@ void domain_kill(struct domain *d)
    6.14      domain_relinquish_resources(d);
    6.15      put_domain(d);
    6.16  
    6.17 +    /* Kick page scrubbing after domain_relinquish_resources(). */
    6.18 +    page_scrub_kick();
    6.19 +
    6.20      send_guest_global_virq(dom0, VIRQ_DOM_EXC);
    6.21  }
    6.22  
    6.23 @@ -589,6 +595,61 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
    6.24          break;
    6.25      }
    6.26  
    6.27 +    case VCPUOP_set_periodic_timer:
    6.28 +    {
    6.29 +        struct vcpu_set_periodic_timer set;
    6.30 +
    6.31 +        rc = -EFAULT;
    6.32 +        if ( copy_from_guest(&set, arg, 1) )
    6.33 +            break;
    6.34 +
    6.35 +        rc = -EINVAL;
    6.36 +        if ( set.period_ns < MILLISECS(1) )
    6.37 +            break;
    6.38 +
    6.39 +        v->periodic_period = set.period_ns;
    6.40 +        vcpu_force_reschedule(v);
    6.41 +
    6.42 +        break;
    6.43 +    }
    6.44 +
    6.45 +    case VCPUOP_stop_periodic_timer:
    6.46 +    {
    6.47 +        v->periodic_period = 0;
    6.48 +        vcpu_force_reschedule(v);
    6.49 +        break;
    6.50 +    }
    6.51 +
    6.52 +    case VCPUOP_set_singleshot_timer:
    6.53 +    {
    6.54 +        struct vcpu_set_singleshot_timer set;
    6.55 +
    6.56 +        if ( v != current )
    6.57 +            return -EINVAL;
    6.58 +
    6.59 +        if ( copy_from_guest(&set, arg, 1) )
    6.60 +            return -EFAULT;
    6.61 +
    6.62 +        if ( v->singleshot_timer.cpu != smp_processor_id() )
    6.63 +        {
    6.64 +            stop_timer(&v->singleshot_timer);
    6.65 +            v->singleshot_timer.cpu = smp_processor_id();
    6.66 +        }
    6.67 +
    6.68 +        set_timer(&v->singleshot_timer, set.timeout_abs_ns);
    6.69 +
    6.70 +        break;
    6.71 +    }
    6.72 +
    6.73 +    case VCPUOP_stop_singleshot_timer:
    6.74 +    {
    6.75 +        if ( v != current )
    6.76 +            return -EINVAL;
    6.77 +
    6.78 +        stop_timer(&v->singleshot_timer);
    6.79 +        break;
    6.80 +    }
    6.81 +
    6.82      default:
    6.83          rc = arch_do_vcpu_op(cmd, v, arg);
    6.84          break;
     7.1 --- a/xen/common/page_alloc.c	Fri Mar 09 18:26:22 2007 +0000
     7.2 +++ b/xen/common/page_alloc.c	Fri Mar 09 18:26:47 2007 +0000
     7.3 @@ -970,6 +970,8 @@ static __init int pagealloc_keyhandler_i
     7.4   * PAGE SCRUBBING
     7.5   */
     7.6  
     7.7 +static DEFINE_PER_CPU(struct timer, page_scrub_timer);
     7.8 +
     7.9  static void page_scrub_softirq(void)
    7.10  {
    7.11      struct list_head *ent;
    7.12 @@ -978,7 +980,7 @@ static void page_scrub_softirq(void)
    7.13      int               i;
    7.14      s_time_t          start = NOW();
    7.15  
    7.16 -    /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
    7.17 +    /* Aim to do 1ms of work every 10ms. */
    7.18      do {
    7.19          spin_lock(&page_scrub_lock);
    7.20  
    7.21 @@ -1014,6 +1016,13 @@ static void page_scrub_softirq(void)
    7.22              free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
    7.23          }
    7.24      } while ( (NOW() - start) < MILLISECS(1) );
    7.25 +
    7.26 +    set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
    7.27 +}
    7.28 +
    7.29 +static void page_scrub_timer_fn(void *unused)
    7.30 +{
    7.31 +    page_scrub_schedule_work();
    7.32  }
    7.33  
    7.34  unsigned long avail_scrub_pages(void)
    7.35 @@ -1049,6 +1058,10 @@ static __init int register_heap_trigger(
    7.36  
    7.37  static __init int page_scrub_init(void)
    7.38  {
    7.39 +    int cpu;
    7.40 +    for_each_cpu ( cpu )
    7.41 +        init_timer(&per_cpu(page_scrub_timer, cpu),
    7.42 +                   page_scrub_timer_fn, NULL, cpu);
    7.43      open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
    7.44      return 0;
    7.45  }
     8.1 --- a/xen/common/sched_credit.c	Fri Mar 09 18:26:22 2007 +0000
     8.2 +++ b/xen/common/sched_credit.c	Fri Mar 09 18:26:47 2007 +0000
     8.3 @@ -186,6 +186,8 @@
     8.4  struct csched_pcpu {
     8.5      struct list_head runq;
     8.6      uint32_t runq_sort_last;
     8.7 +    struct timer ticker;
     8.8 +    unsigned int tick;
     8.9  };
    8.10  
    8.11  /*
    8.12 @@ -245,7 +247,7 @@ struct csched_private {
    8.13   */
    8.14  static struct csched_private csched_priv;
    8.15  
    8.16 -
    8.17 +static void csched_tick(void *_cpu);
    8.18  
    8.19  static inline int
    8.20  __cycle_cpu(int cpu, const cpumask_t *mask)
    8.21 @@ -362,12 +364,13 @@ csched_pcpu_init(int cpu)
    8.22      if ( csched_priv.master >= csched_priv.ncpus )
    8.23          csched_priv.master = cpu;
    8.24  
    8.25 +    init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
    8.26      INIT_LIST_HEAD(&spc->runq);
    8.27      spc->runq_sort_last = csched_priv.runq_sort;
    8.28      per_cpu(schedule_data, cpu).sched_priv = spc;
    8.29  
    8.30      /* Start off idling... */
    8.31 -    BUG_ON( !is_idle_vcpu(per_cpu(schedule_data, cpu).curr) );
    8.32 +    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
    8.33      cpu_set(cpu, csched_priv.idlers);
    8.34  
    8.35      spin_unlock_irqrestore(&csched_priv.lock, flags);
    8.36 @@ -1013,8 +1016,13 @@ csched_acct(void)
    8.37  }
    8.38  
    8.39  static void
    8.40 -csched_tick(unsigned int cpu)
    8.41 +csched_tick(void *_cpu)
    8.42  {
    8.43 +    unsigned int cpu = (unsigned long)_cpu;
    8.44 +    struct csched_pcpu *spc = CSCHED_PCPU(cpu);
    8.45 +
    8.46 +    spc->tick++;
    8.47 +
    8.48      /*
    8.49       * Accounting for running VCPU
    8.50       */
    8.51 @@ -1028,7 +1036,7 @@ csched_tick(unsigned int cpu)
    8.52       * we could distribute or at the very least cycle the duty.
    8.53       */
    8.54      if ( (csched_priv.master == cpu) &&
    8.55 -         (per_cpu(schedule_data, cpu).tick % CSCHED_TICKS_PER_ACCT) == 0 )
    8.56 +         (spc->tick % CSCHED_TICKS_PER_ACCT) == 0 )
    8.57      {
    8.58          csched_acct();
    8.59      }
    8.60 @@ -1041,6 +1049,8 @@ csched_tick(unsigned int cpu)
    8.61       * once per accounting period (currently 30 milliseconds).
    8.62       */
    8.63      csched_runq_sort(cpu);
    8.64 +
    8.65 +    set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
    8.66  }
    8.67  
    8.68  static struct csched_vcpu *
    8.69 @@ -1248,8 +1258,7 @@ csched_dump_pcpu(int cpu)
    8.70      spc = CSCHED_PCPU(cpu);
    8.71      runq = &spc->runq;
    8.72  
    8.73 -    printk(" tick=%lu, sort=%d, sibling=0x%lx, core=0x%lx\n",
    8.74 -            per_cpu(schedule_data, cpu).tick,
    8.75 +    printk(" sort=%d, sibling=0x%lx, core=0x%lx\n",
    8.76              spc->runq_sort_last,
    8.77              cpu_sibling_map[cpu].bits[0],
    8.78              cpu_core_map[cpu].bits[0]);
    8.79 @@ -1341,6 +1350,22 @@ csched_init(void)
    8.80      CSCHED_STATS_RESET();
    8.81  }
    8.82  
    8.83 +/* Tickers cannot be kicked until SMP subsystem is alive. */
    8.84 +static __init int csched_start_tickers(void)
    8.85 +{
    8.86 +    struct csched_pcpu *spc;
    8.87 +    unsigned int cpu;
    8.88 +
    8.89 +    for_each_online_cpu ( cpu )
    8.90 +    {
    8.91 +        spc = CSCHED_PCPU(cpu);
    8.92 +        set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
    8.93 +    }
    8.94 +
    8.95 +    return 0;
    8.96 +}
    8.97 +__initcall(csched_start_tickers);
    8.98 +
    8.99  
   8.100  struct scheduler sched_credit_def = {
   8.101      .name           = "SMP Credit Scheduler",
   8.102 @@ -1359,7 +1384,6 @@ struct scheduler sched_credit_def = {
   8.103      .adjust         = csched_dom_cntl,
   8.104  
   8.105      .pick_cpu       = csched_cpu_pick,
   8.106 -    .tick           = csched_tick,
   8.107      .do_schedule    = csched_schedule,
   8.108  
   8.109      .dump_cpu_state = csched_dump_pcpu,
     9.1 --- a/xen/common/schedule.c	Fri Mar 09 18:26:22 2007 +0000
     9.2 +++ b/xen/common/schedule.c	Fri Mar 09 18:26:47 2007 +0000
     9.3 @@ -45,8 +45,8 @@ boolean_param("dom0_vcpus_pin", opt_dom0
     9.4  
     9.5  /* Various timer handlers. */
     9.6  static void s_timer_fn(void *unused);
     9.7 -static void t_timer_fn(void *unused);
     9.8 -static void vcpu_timer_fn(void *data);
     9.9 +static void vcpu_periodic_timer_fn(void *data);
    9.10 +static void vcpu_singleshot_timer_fn(void *data);
    9.11  static void poll_timer_fn(void *data);
    9.12  
    9.13  /* This is global for now so that private implementations can reach it */
    9.14 @@ -66,9 +66,6 @@ static struct scheduler ops;
    9.15           (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ )      \
    9.16            : (typeof(ops.fn(__VA_ARGS__)))0 )
    9.17  
    9.18 -/* Per-CPU periodic timer sends an event to the currently-executing domain. */
    9.19 -static DEFINE_PER_CPU(struct timer, t_timer);
    9.20 -
    9.21  static inline void vcpu_runstate_change(
    9.22      struct vcpu *v, int new_state, s_time_t new_entry_time)
    9.23  {
    9.24 @@ -114,8 +111,12 @@ int sched_init_vcpu(struct vcpu *v, unsi
    9.25          cpus_setall(v->cpu_affinity);
    9.26  
    9.27      /* Initialise the per-domain timers. */
    9.28 -    init_timer(&v->timer, vcpu_timer_fn, v, v->processor);
    9.29 -    init_timer(&v->poll_timer, poll_timer_fn, v, v->processor);
    9.30 +    init_timer(&v->periodic_timer, vcpu_periodic_timer_fn,
    9.31 +               v, v->processor);
    9.32 +    init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn,
    9.33 +               v, v->processor);
    9.34 +    init_timer(&v->poll_timer, poll_timer_fn,
    9.35 +               v, v->processor);
    9.36  
    9.37      /* Idle VCPUs are scheduled immediately. */
    9.38      if ( is_idle_domain(d) )
    9.39 @@ -132,7 +133,8 @@ int sched_init_vcpu(struct vcpu *v, unsi
    9.40  
    9.41  void sched_destroy_vcpu(struct vcpu *v)
    9.42  {
    9.43 -    kill_timer(&v->timer);
    9.44 +    kill_timer(&v->periodic_timer);
    9.45 +    kill_timer(&v->singleshot_timer);
    9.46      kill_timer(&v->poll_timer);
    9.47      SCHED_OP(destroy_vcpu, v);
    9.48  }
    9.49 @@ -223,10 +225,29 @@ static void vcpu_migrate(struct vcpu *v)
    9.50      vcpu_wake(v);
    9.51  }
    9.52  
    9.53 +/*
    9.54 + * Force a VCPU through a deschedule/reschedule path.
    9.55 + * For example, using this when setting the periodic timer period means that
    9.56 + * most periodic-timer state need only be touched from within the scheduler
    9.57 + * which can thus be done without need for synchronisation.
    9.58 + */
    9.59 +void vcpu_force_reschedule(struct vcpu *v)
    9.60 +{
    9.61 +    vcpu_schedule_lock_irq(v);
    9.62 +    if ( test_bit(_VCPUF_running, &v->vcpu_flags) )
    9.63 +        set_bit(_VCPUF_migrating, &v->vcpu_flags);
    9.64 +    vcpu_schedule_unlock_irq(v);
    9.65 +
    9.66 +    if ( test_bit(_VCPUF_migrating, &v->vcpu_flags) )
    9.67 +    {
    9.68 +        vcpu_sleep_nosync(v);
    9.69 +        vcpu_migrate(v);
    9.70 +    }
    9.71 +}
    9.72 +
    9.73  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
    9.74  {
    9.75      cpumask_t online_affinity;
    9.76 -    unsigned long flags;
    9.77  
    9.78      if ( (v->domain->domain_id == 0) && opt_dom0_vcpus_pin )
    9.79          return -EINVAL;
    9.80 @@ -235,13 +256,13 @@ int vcpu_set_affinity(struct vcpu *v, cp
    9.81      if ( cpus_empty(online_affinity) )
    9.82          return -EINVAL;
    9.83  
    9.84 -    vcpu_schedule_lock_irqsave(v, flags);
    9.85 +    vcpu_schedule_lock_irq(v);
    9.86  
    9.87      v->cpu_affinity = *affinity;
    9.88      if ( !cpu_isset(v->processor, v->cpu_affinity) )
    9.89          set_bit(_VCPUF_migrating, &v->vcpu_flags);
    9.90  
    9.91 -    vcpu_schedule_unlock_irqrestore(v, flags);
    9.92 +    vcpu_schedule_unlock_irq(v);
    9.93  
    9.94      if ( test_bit(_VCPUF_migrating, &v->vcpu_flags) )
    9.95      {
    9.96 @@ -458,7 +479,7 @@ long do_set_timer_op(s_time_t timeout)
    9.97  
    9.98      if ( timeout == 0 )
    9.99      {
   9.100 -        stop_timer(&v->timer);
   9.101 +        stop_timer(&v->singleshot_timer);
   9.102      }
   9.103      else if ( unlikely(timeout < 0) || /* overflow into 64th bit? */
   9.104                unlikely((offset > 0) && ((uint32_t)(offset >> 50) != 0)) )
   9.105 @@ -474,14 +495,20 @@ long do_set_timer_op(s_time_t timeout)
   9.106           * timeout in this case can burn a lot of CPU. We therefore go for a
   9.107           * reasonable middleground of triggering a timer event in 100ms.
   9.108           */
   9.109 -        gdprintk(XENLOG_INFO, "Warning: huge timeout set by domain %d (vcpu %d):"
   9.110 -                " %"PRIx64"\n",
   9.111 +        gdprintk(XENLOG_INFO, "Warning: huge timeout set by domain %d "
   9.112 +                "(vcpu %d): %"PRIx64"\n",
   9.113                  v->domain->domain_id, v->vcpu_id, (uint64_t)timeout);
   9.114 -        set_timer(&v->timer, NOW() + MILLISECS(100));
   9.115 +        set_timer(&v->singleshot_timer, NOW() + MILLISECS(100));
   9.116      }
   9.117      else
   9.118      {
   9.119 -        set_timer(&v->timer, timeout);
   9.120 +        if ( v->singleshot_timer.cpu != smp_processor_id() )
   9.121 +        {
   9.122 +            stop_timer(&v->singleshot_timer);
   9.123 +            v->singleshot_timer.cpu = smp_processor_id();
   9.124 +        }
   9.125 +
   9.126 +        set_timer(&v->singleshot_timer, timeout);
   9.127      }
   9.128  
   9.129      return 0;
   9.130 @@ -540,6 +567,28 @@ long sched_adjust(struct domain *d, stru
   9.131      return 0;
   9.132  }
   9.133  
   9.134 +static void vcpu_periodic_timer_work(struct vcpu *v)
   9.135 +{
   9.136 +    s_time_t now = NOW();
   9.137 +    uint64_t periodic_next_event;
   9.138 +
   9.139 +    ASSERT(!active_timer(&v->periodic_timer));
   9.140 +
   9.141 +    if ( v->periodic_period == 0 )
   9.142 +        return;
   9.143 +
   9.144 +    periodic_next_event = v->periodic_last_event + v->periodic_period;
   9.145 +    if ( now > periodic_next_event )
   9.146 +    {
   9.147 +        send_timer_event(v);
   9.148 +        v->periodic_last_event = now;
   9.149 +        periodic_next_event = now + v->periodic_period;
   9.150 +    }
   9.151 +
   9.152 +    v->periodic_timer.cpu = smp_processor_id();
   9.153 +    set_timer(&v->periodic_timer, periodic_next_event);
   9.154 +}
   9.155 +
   9.156  /* 
   9.157   * The main function
   9.158   * - deschedule the current domain (scheduler independent).
   9.159 @@ -606,14 +655,13 @@ static void schedule(void)
   9.160  
   9.161      perfc_incrc(sched_ctx);
   9.162  
   9.163 -    prev->sleep_tick = sd->tick;
   9.164 +    stop_timer(&prev->periodic_timer);
   9.165  
   9.166      /* Ensure that the domain has an up-to-date time base. */
   9.167      if ( !is_idle_vcpu(next) )
   9.168      {
   9.169          update_vcpu_system_time(next);
   9.170 -        if ( next->sleep_tick != sd->tick )
   9.171 -            send_timer_event(next);
   9.172 +        vcpu_periodic_timer_work(next);
   9.173      }
   9.174  
   9.175      TRACE_4D(TRC_SCHED_SWITCH,
   9.176 @@ -631,13 +679,6 @@ void context_saved(struct vcpu *prev)
   9.177          vcpu_migrate(prev);
   9.178  }
   9.179  
   9.180 -/****************************************************************************
   9.181 - * Timers: the scheduler utilises a number of timers
   9.182 - * - s_timer: per CPU timer for preemption and scheduling decisions
   9.183 - * - t_timer: per CPU periodic timer to send timer interrupt to current dom
   9.184 - * - dom_timer: per domain timer to specifiy timeout values
   9.185 - ****************************************************************************/
   9.186 -
   9.187  /* The scheduler timer: force a run through the scheduler */
   9.188  static void s_timer_fn(void *unused)
   9.189  {
   9.190 @@ -645,28 +686,15 @@ static void s_timer_fn(void *unused)
   9.191      perfc_incrc(sched_irq);
   9.192  }
   9.193  
   9.194 -/* Periodic tick timer: send timer event to current domain */
   9.195 -static void t_timer_fn(void *unused)
   9.196 +/* Per-VCPU periodic timer function: sends a virtual timer interrupt. */
   9.197 +static void vcpu_periodic_timer_fn(void *data)
   9.198  {
   9.199 -    struct vcpu *v   = current;
   9.200 -
   9.201 -    this_cpu(schedule_data).tick++;
   9.202 -
   9.203 -    if ( !is_idle_vcpu(v) )
   9.204 -    {
   9.205 -        update_vcpu_system_time(v);
   9.206 -        send_timer_event(v);
   9.207 -    }
   9.208 -
   9.209 -    page_scrub_schedule_work();
   9.210 -
   9.211 -    SCHED_OP(tick, smp_processor_id());
   9.212 -
   9.213 -    set_timer(&this_cpu(t_timer), NOW() + MILLISECS(10));
   9.214 +    struct vcpu *v = data;
   9.215 +    vcpu_periodic_timer_work(v);
   9.216  }
   9.217  
   9.218 -/* Per-VCPU timer function: sends a virtual timer interrupt. */
   9.219 -static void vcpu_timer_fn(void *data)
   9.220 +/* Per-VCPU single-shot timer function: sends a virtual timer interrupt. */
   9.221 +static void vcpu_singleshot_timer_fn(void *data)
   9.222  {
   9.223      struct vcpu *v = data;
   9.224      send_timer_event(v);
   9.225 @@ -691,7 +719,6 @@ void __init scheduler_init(void)
   9.226      {
   9.227          spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
   9.228          init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
   9.229 -        init_timer(&per_cpu(t_timer, i), t_timer_fn, NULL, i);
   9.230      }
   9.231  
   9.232      for ( i = 0; schedulers[i] != NULL; i++ )
   9.233 @@ -708,16 +735,6 @@ void __init scheduler_init(void)
   9.234      SCHED_OP(init);
   9.235  }
   9.236  
   9.237 -/*
   9.238 - * Start a scheduler for each CPU
   9.239 - * This has to be done *after* the timers, e.g., APICs, have been initialised
   9.240 - */
   9.241 -void schedulers_start(void) 
   9.242 -{   
   9.243 -    t_timer_fn(0);
   9.244 -    smp_call_function((void *)t_timer_fn, NULL, 1, 1);
   9.245 -}
   9.246 -
   9.247  void dump_runq(unsigned char key)
   9.248  {
   9.249      s_time_t      now = NOW();
    10.1 --- a/xen/include/public/vcpu.h	Fri Mar 09 18:26:22 2007 +0000
    10.2 +++ b/xen/include/public/vcpu.h	Fri Mar 09 18:26:47 2007 +0000
    10.3 @@ -42,13 +42,13 @@
    10.4   * @extra_arg == pointer to vcpu_guest_context structure containing initial
    10.5   *               state for the VCPU.
    10.6   */
    10.7 -#define VCPUOP_initialise           0
    10.8 +#define VCPUOP_initialise            0
    10.9  
   10.10  /*
   10.11   * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
   10.12   * if the VCPU has not been initialised (VCPUOP_initialise).
   10.13   */
   10.14 -#define VCPUOP_up                   1
   10.15 +#define VCPUOP_up                    1
   10.16  
   10.17  /*
   10.18   * Bring down a VCPU (i.e., make it non-runnable).
   10.19 @@ -64,16 +64,16 @@
   10.20   *     practise to move a VCPU onto an 'idle' or default page table, LDT and
   10.21   *     GDT before bringing it down.
   10.22   */
   10.23 -#define VCPUOP_down                 2
   10.24 +#define VCPUOP_down                  2
   10.25  
   10.26  /* Returns 1 if the given VCPU is up. */
   10.27 -#define VCPUOP_is_up                3
   10.28 +#define VCPUOP_is_up                 3
   10.29  
   10.30  /*
   10.31   * Return information about the state and running time of a VCPU.
   10.32   * @extra_arg == pointer to vcpu_runstate_info structure.
   10.33   */
   10.34 -#define VCPUOP_get_runstate_info    4
   10.35 +#define VCPUOP_get_runstate_info     4
   10.36  struct vcpu_runstate_info {
   10.37      /* VCPU's current state (RUNSTATE_*). */
   10.38      int      state;
   10.39 @@ -128,6 +128,32 @@ struct vcpu_register_runstate_memory_are
   10.40      } addr;
   10.41  };
   10.42  typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
   10.43 +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
   10.44 +
   10.45 +/*
   10.46 + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
   10.47 + * which can be set via these commands. Periods smaller than one millisecond
   10.48 + * may not be supported.
   10.49 + */
   10.50 +#define VCPUOP_set_periodic_timer    6 /* arg == vcpu_set_periodic_timer_t */
   10.51 +#define VCPUOP_stop_periodic_timer   7 /* arg == NULL */
   10.52 +struct vcpu_set_periodic_timer {
   10.53 +    uint64_t period_ns;
   10.54 +};
   10.55 +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
   10.56 +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
   10.57 +
   10.58 +/*
   10.59 + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
   10.60 + * timer which can be set via these commands.
   10.61 + */
   10.62 +#define VCPUOP_set_singleshot_timer  8 /* arg == vcpu_set_singleshot_timer_t */
   10.63 +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
   10.64 +struct vcpu_set_singleshot_timer {
   10.65 +    uint64_t timeout_abs_ns;
   10.66 +};
   10.67 +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
   10.68 +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
   10.69  
   10.70  #endif /* __XEN_PUBLIC_VCPU_H__ */
   10.71  
    11.1 --- a/xen/include/xen/mm.h	Fri Mar 09 18:26:22 2007 +0000
    11.2 +++ b/xen/include/xen/mm.h	Fri Mar 09 18:26:47 2007 +0000
    11.3 @@ -92,6 +92,11 @@ extern struct list_head page_scrub_list;
    11.4          if ( !list_empty(&page_scrub_list) )    \
    11.5              raise_softirq(PAGE_SCRUB_SOFTIRQ);  \
    11.6      } while ( 0 )
    11.7 +#define page_scrub_kick()                                               \
    11.8 +    do {                                                                \
    11.9 +        if ( !list_empty(&page_scrub_list) )                            \
   11.10 +            cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ);  \
   11.11 +    } while ( 0 )
   11.12  unsigned long avail_scrub_pages(void);
   11.13  
   11.14  #include <asm/mm.h>
    12.1 --- a/xen/include/xen/sched-if.h	Fri Mar 09 18:26:22 2007 +0000
    12.2 +++ b/xen/include/xen/sched-if.h	Fri Mar 09 18:26:47 2007 +0000
    12.3 @@ -16,7 +16,6 @@ struct schedule_data {
    12.4      struct vcpu        *idle;           /* idle task for this cpu          */
    12.5      void               *sched_priv;
    12.6      struct timer        s_timer;        /* scheduling timer                */
    12.7 -    unsigned long       tick;           /* current periodic 'tick'         */
    12.8  } __cacheline_aligned;
    12.9  
   12.10  DECLARE_PER_CPU(struct schedule_data, schedule_data);
   12.11 @@ -61,7 +60,6 @@ struct scheduler {
   12.12      unsigned int sched_id;  /* ID for this scheduler             */
   12.13  
   12.14      void         (*init)           (void);
   12.15 -    void         (*tick)           (unsigned int cpu);
   12.16  
   12.17      int          (*init_domain)    (struct domain *);
   12.18      void         (*destroy_domain) (struct domain *);
    13.1 --- a/xen/include/xen/sched.h	Fri Mar 09 18:26:22 2007 +0000
    13.2 +++ b/xen/include/xen/sched.h	Fri Mar 09 18:26:47 2007 +0000
    13.3 @@ -79,8 +79,10 @@ struct vcpu
    13.4  
    13.5      struct vcpu     *next_in_list;
    13.6  
    13.7 -    struct timer     timer;         /* one-shot timer for timeout values */
    13.8 -    unsigned long    sleep_tick;    /* tick at which this vcpu started sleep */
    13.9 +    uint64_t         periodic_period;
   13.10 +    uint64_t         periodic_last_event;
   13.11 +    struct timer     periodic_timer;
   13.12 +    struct timer     singleshot_timer;
   13.13  
   13.14      struct timer     poll_timer;    /* timeout for SCHEDOP_poll */
   13.15  
   13.16 @@ -332,7 +334,6 @@ void __domain_crash_synchronous(void) __
   13.17  
   13.18  #define set_current_state(_s) do { current->state = (_s); } while (0)
   13.19  void scheduler_init(void);
   13.20 -void schedulers_start(void);
   13.21  int  sched_init_vcpu(struct vcpu *v, unsigned int processor);
   13.22  void sched_destroy_vcpu(struct vcpu *v);
   13.23  int  sched_init_domain(struct domain *d);
   13.24 @@ -497,6 +498,7 @@ void domain_pause_by_systemcontroller(st
   13.25  void domain_unpause_by_systemcontroller(struct domain *d);
   13.26  void cpu_init(void);
   13.27  
   13.28 +void vcpu_force_reschedule(struct vcpu *v);
   13.29  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
   13.30  
   13.31  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
    14.1 --- a/xen/include/xen/timer.h	Fri Mar 09 18:26:22 2007 +0000
    14.2 +++ b/xen/include/xen/timer.h	Fri Mar 09 18:26:47 2007 +0000
    14.3 @@ -35,7 +35,7 @@ struct timer {
    14.4   * The timer must *previously* have been initialised by init_timer(), or its
    14.5   * structure initialised to all-zeroes.
    14.6   */
    14.7 -static __inline__ int active_timer(struct timer *timer)
    14.8 +static inline int active_timer(struct timer *timer)
    14.9  {
   14.10      return (timer->heap_offset != 0);
   14.11  }
   14.12 @@ -46,7 +46,7 @@ static __inline__ int active_timer(struc
   14.13   * time (and multiple times) on an inactive timer. It must *never* execute
   14.14   * concurrently with any other operation on the same timer.
   14.15   */
   14.16 -static __inline__ void init_timer(
   14.17 +static inline void init_timer(
   14.18      struct timer *timer,
   14.19      void           (*function)(void *),
   14.20      void            *data,