ia64/xen-unstable

changeset 12988:3c7a6081f1a9

Merge with xenppc-unstable
author kfraser@localhost.localdomain
date Wed Dec 13 10:05:07 2006 +0000 (2006-12-13)
parents c08ac3b211c8 2fa06640a1c1
children d5499ff80b19 de69059a1f0e
files
line diff
     1.1 --- a/xen/arch/x86/crash.c	Tue Dec 12 14:35:07 2006 -0600
     1.2 +++ b/xen/arch/x86/crash.c	Wed Dec 13 10:05:07 2006 +0000
     1.3 @@ -58,9 +58,9 @@ static int crash_nmi_callback(struct cpu
     1.4  static void smp_send_nmi_allbutself(void)
     1.5  {
     1.6      cpumask_t allbutself = cpu_online_map;
     1.7 -
     1.8      cpu_clear(smp_processor_id(), allbutself);
     1.9 -    send_IPI_mask(allbutself, APIC_DM_NMI);
    1.10 +    if ( !cpus_empty(allbutself) )
    1.11 +        send_IPI_mask(allbutself, APIC_DM_NMI);
    1.12  }
    1.13  
    1.14  static void nmi_shootdown_cpus(void)
     2.1 --- a/xen/arch/x86/mm.c	Tue Dec 12 14:35:07 2006 -0600
     2.2 +++ b/xen/arch/x86/mm.c	Wed Dec 13 10:05:07 2006 +0000
     2.3 @@ -2951,7 +2951,17 @@ long arch_memory_op(int op, XEN_GUEST_HA
     2.4          guest_physmap_add_page(d, xatp.gpfn, mfn);
     2.5  
     2.6          UNLOCK_BIGLOCK(d);
     2.7 -        
     2.8 +
     2.9 +        /* If we're doing FAST_FAULT_PATH, then shadow mode may have
    2.10 +           cached the fact that this is an mmio region in the shadow
    2.11 +           page tables.  Blow the tables away to remove the cache.
    2.12 +           This is pretty heavy handed, but this is a rare operation
    2.13 +           (it might happen a dozen times during boot and then never
    2.14 +           again), so it doesn't matter too much. */
    2.15 +        shadow_lock(d);
    2.16 +        shadow_blow_tables(d);
    2.17 +        shadow_unlock(d);
    2.18 +
    2.19          put_domain(d);
    2.20  
    2.21          break;
     3.1 --- a/xen/arch/x86/mm/shadow/common.c	Tue Dec 12 14:35:07 2006 -0600
     3.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Dec 13 10:05:07 2006 +0000
     3.3 @@ -791,7 +791,7 @@ void shadow_prealloc(struct domain *d, u
     3.4  
     3.5  /* Deliberately free all the memory we can: this will tear down all of
     3.6   * this domain's shadows */
     3.7 -static void shadow_blow_tables(struct domain *d) 
     3.8 +void shadow_blow_tables(struct domain *d) 
     3.9  {
    3.10      struct list_head *l, *t;
    3.11      struct shadow_page_info *sp;
     4.1 --- a/xen/arch/x86/mm/shadow/multi.c	Tue Dec 12 14:35:07 2006 -0600
     4.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Wed Dec 13 10:05:07 2006 +0000
     4.3 @@ -3488,6 +3488,9 @@ sh_update_cr3(struct vcpu *v)
     4.4                                         ? SH_type_l2h_shadow 
     4.5                                         : SH_type_l2_shadow);
     4.6              }
     4.7 +            else
     4.8 +                /* The guest is not present: clear out the shadow. */
     4.9 +                sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
    4.10          }
    4.11      }
    4.12  #elif GUEST_PAGING_LEVELS == 4
     5.1 --- a/xen/common/domain.c	Tue Dec 12 14:35:07 2006 -0600
     5.2 +++ b/xen/common/domain.c	Wed Dec 13 10:05:07 2006 +0000
     5.3 @@ -238,7 +238,11 @@ void domain_kill(struct domain *d)
     5.4  
     5.5  void __domain_crash(struct domain *d)
     5.6  {
     5.7 -    if ( d == current->domain )
     5.8 +    if ( test_bit(_DOMF_shutdown, &d->domain_flags) )
     5.9 +    {
    5.10 +        /* Print nothing: the domain is already shutting down. */
    5.11 +    }
    5.12 +    else if ( d == current->domain )
    5.13      {
    5.14          printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
    5.15                 d->domain_id, current->vcpu_id, smp_processor_id());
     6.1 --- a/xen/common/sched_credit.c	Tue Dec 12 14:35:07 2006 -0600
     6.2 +++ b/xen/common/sched_credit.c	Wed Dec 13 10:05:07 2006 +0000
     6.3 @@ -106,20 +106,13 @@
     6.4      _MACRO(tickle_local_other)              \
     6.5      _MACRO(tickle_idlers_none)              \
     6.6      _MACRO(tickle_idlers_some)              \
     6.7 -    _MACRO(vcpu_migrate)                    \
     6.8      _MACRO(load_balance_idle)               \
     6.9      _MACRO(load_balance_over)               \
    6.10      _MACRO(load_balance_other)              \
    6.11      _MACRO(steal_trylock_failed)            \
    6.12 -    _MACRO(steal_peer_down)                 \
    6.13      _MACRO(steal_peer_idle)                 \
    6.14 -    _MACRO(steal_peer_running)              \
    6.15 -    _MACRO(steal_peer_pinned)               \
    6.16 -    _MACRO(steal_peer_migrating)            \
    6.17 -    _MACRO(steal_peer_best_idler)           \
    6.18 -    _MACRO(steal_loner_candidate)           \
    6.19 -    _MACRO(steal_loner_signal)              \
    6.20 -    _MACRO(cpu_pick)                        \
    6.21 +    _MACRO(migrate_queued)                  \
    6.22 +    _MACRO(migrate_running)                 \
    6.23      _MACRO(dom_init)                        \
    6.24      _MACRO(dom_destroy)                     \
    6.25      _MACRO(vcpu_init)                       \
    6.26 @@ -146,7 +139,7 @@
    6.27      struct                                      \
    6.28      {                                           \
    6.29          CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
    6.30 -    } stats
    6.31 +    } stats;
    6.32  
    6.33  #define CSCHED_STATS_PRINTK()                   \
    6.34      do                                          \
    6.35 @@ -155,14 +148,27 @@
    6.36          CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
    6.37      } while ( 0 )
    6.38  
    6.39 -#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
    6.40 +#define CSCHED_STAT_CRANK(_X)               (CSCHED_STAT(_X)++)
    6.41 +
    6.42 +#define CSCHED_VCPU_STATS_RESET(_V)                     \
    6.43 +    do                                                  \
    6.44 +    {                                                   \
    6.45 +        memset(&(_V)->stats, 0, sizeof((_V)->stats));   \
    6.46 +    } while ( 0 )
    6.47 +
    6.48 +#define CSCHED_VCPU_STAT_CRANK(_V, _X)      (((_V)->stats._X)++)
    6.49 +
    6.50 +#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    (((_V)->stats._X) = (_Y))
    6.51  
    6.52  #else /* CSCHED_STATS */
    6.53  
    6.54 -#define CSCHED_STATS_RESET()    do {} while ( 0 )
    6.55 -#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
    6.56 -#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
    6.57 -#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
    6.58 +#define CSCHED_STATS_RESET()                do {} while ( 0 )
    6.59 +#define CSCHED_STATS_DEFINE()
    6.60 +#define CSCHED_STATS_PRINTK()               do {} while ( 0 )
    6.61 +#define CSCHED_STAT_CRANK(_X)               do {} while ( 0 )
    6.62 +#define CSCHED_VCPU_STATS_RESET(_V)         do {} while ( 0 )
    6.63 +#define CSCHED_VCPU_STAT_CRANK(_V, _X)      do {} while ( 0 )
    6.64 +#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    do {} while ( 0 )
    6.65  
    6.66  #endif /* CSCHED_STATS */
    6.67  
    6.68 @@ -185,13 +191,16 @@ struct csched_vcpu {
    6.69      struct vcpu *vcpu;
    6.70      atomic_t credit;
    6.71      int16_t pri;
    6.72 +#ifdef CSCHED_STATS
    6.73      struct {
    6.74          int credit_last;
    6.75          uint32_t credit_incr;
    6.76          uint32_t state_active;
    6.77          uint32_t state_idle;
    6.78 -        uint32_t migrate;
    6.79 +        uint32_t migrate_q;
    6.80 +        uint32_t migrate_r;
    6.81      } stats;
    6.82 +#endif
    6.83  };
    6.84  
    6.85  /*
    6.86 @@ -219,7 +228,7 @@ struct csched_private {
    6.87      uint32_t credit;
    6.88      int credit_balance;
    6.89      uint32_t runq_sort;
    6.90 -    CSCHED_STATS_DEFINE();
    6.91 +    CSCHED_STATS_DEFINE()
    6.92  };
    6.93  
    6.94  
    6.95 @@ -231,6 +240,15 @@ static struct csched_private csched_priv
    6.96  
    6.97  
    6.98  static inline int
    6.99 +__cycle_cpu(int cpu, const cpumask_t *mask)
   6.100 +{
   6.101 +    int nxt = next_cpu(cpu, *mask);
   6.102 +    if (nxt == NR_CPUS)
   6.103 +        nxt = first_cpu(*mask);
   6.104 +    return nxt;
   6.105 +}
   6.106 +
   6.107 +static inline int
   6.108  __vcpu_on_runq(struct csched_vcpu *svc)
   6.109  {
   6.110      return !list_empty(&svc->runq_elem);
   6.111 @@ -375,118 +393,138 @@ static inline void
   6.112  #define CSCHED_VCPU_CHECK(_vc)
   6.113  #endif
   6.114  
   6.115 -/*
   6.116 - * Indicates which of two given idlers is most efficient to run
   6.117 - * an additional VCPU.
   6.118 - *
   6.119 - * Returns:
   6.120 - *  0:           They are the same.
   6.121 - *  negative:    One is less efficient than Two.
   6.122 - *  positive:    One is more efficient than Two.
   6.123 - */
   6.124 -static int
   6.125 -csched_idler_compare(int one, int two)
   6.126 -{
   6.127 -    cpumask_t idlers;
   6.128 -    cpumask_t one_idlers;
   6.129 -    cpumask_t two_idlers;
   6.130 -
   6.131 -    idlers = csched_priv.idlers;
   6.132 -    cpu_clear(one, idlers);
   6.133 -    cpu_clear(two, idlers);
   6.134 -
   6.135 -    if ( cpu_isset(one, cpu_core_map[two]) )
   6.136 -    {
   6.137 -        cpus_and(one_idlers, idlers, cpu_sibling_map[one]);
   6.138 -        cpus_and(two_idlers, idlers, cpu_sibling_map[two]);
   6.139 -    }
   6.140 -    else
   6.141 -    {
   6.142 -        cpus_and(one_idlers, idlers, cpu_core_map[one]);
   6.143 -        cpus_and(two_idlers, idlers, cpu_core_map[two]);
   6.144 -    }
   6.145 -
   6.146 -    return cpus_weight(one_idlers) - cpus_weight(two_idlers);
   6.147 -}
   6.148 -
   6.149  static inline int
   6.150 -__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
   6.151 +__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
   6.152  {
   6.153      /*
   6.154       * Don't pick up work that's in the peer's scheduling tail. Also only pick
   6.155       * up work that's allowed to run on our CPU.
   6.156       */
   6.157 -    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
   6.158 -    {
   6.159 -        CSCHED_STAT_CRANK(steal_peer_running);
   6.160 -        return 0;
   6.161 -    }
   6.162 -
   6.163 -    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
   6.164 -    {
   6.165 -        CSCHED_STAT_CRANK(steal_peer_pinned);
   6.166 -        return 0;
   6.167 -    }
   6.168 -
   6.169 -    return 1;
   6.170 +    return !test_bit(_VCPUF_running, &vc->vcpu_flags) &&
   6.171 +           cpu_isset(dest_cpu, vc->cpu_affinity);
   6.172  }
   6.173  
   6.174 -static inline int
   6.175 -__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
   6.176 +static int
   6.177 +csched_cpu_pick(struct vcpu *vc)
   6.178  {
   6.179 -    BUG_ON( is_idle_vcpu(vc) );
   6.180 +    cpumask_t cpus;
   6.181 +    cpumask_t idlers;
   6.182 +    int cpu;
   6.183  
   6.184 -    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
   6.185 +    /*
   6.186 +     * Pick from online CPUs in VCPU's affinity mask, giving a
   6.187 +     * preference to its current processor if it's in there.
   6.188 +     */
   6.189 +    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
   6.190 +    cpu = cpu_isset(vc->processor, cpus)
   6.191 +            ? vc->processor
   6.192 +            : __cycle_cpu(vc->processor, &cpus);
   6.193 +    ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
   6.194 +
   6.195 +    /*
   6.196 +     * Try to find an idle processor within the above constraints.
   6.197 +     *
   6.198 +     * In multi-core and multi-threaded CPUs, not all idle execution
   6.199 +     * vehicles are equal!
   6.200 +     *
   6.201 +     * We give preference to the idle execution vehicle with the most
   6.202 +     * idling neighbours in its grouping. This distributes work across
   6.203 +     * distinct cores first and guarantees we don't do something stupid
   6.204 +     * like run two VCPUs on co-hyperthreads while there are idle cores
   6.205 +     * or sockets.
   6.206 +     */
   6.207 +    idlers = csched_priv.idlers;
   6.208 +    cpu_set(cpu, idlers);
   6.209 +    cpus_and(cpus, cpus, idlers);
   6.210 +    cpu_clear(cpu, cpus);
   6.211 +
   6.212 +    while ( !cpus_empty(cpus) )
   6.213      {
   6.214 -        CSCHED_STAT_CRANK(steal_peer_pinned);
   6.215 -        return 0;
   6.216 +        cpumask_t cpu_idlers;
   6.217 +        cpumask_t nxt_idlers;
   6.218 +        int nxt;
   6.219 +
   6.220 +        nxt = __cycle_cpu(cpu, &cpus);
   6.221 +
   6.222 +        if ( cpu_isset(cpu, cpu_core_map[nxt]) )
   6.223 +        {
   6.224 +            ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
   6.225 +            cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
   6.226 +            cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
   6.227 +        }
   6.228 +        else
   6.229 +        {
   6.230 +            ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
   6.231 +            cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
   6.232 +            cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
   6.233 +        }
   6.234 +
   6.235 +        if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) )
   6.236 +        {
   6.237 +            cpu = nxt;
   6.238 +            cpu_clear(cpu, cpus);
   6.239 +        }
   6.240 +        else
   6.241 +        {
   6.242 +            cpus_andnot(cpus, cpus, nxt_idlers);
   6.243 +        }
   6.244      }
   6.245  
   6.246 -    if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) )
   6.247 -    {
   6.248 -        CSCHED_STAT_CRANK(steal_peer_migrating);
   6.249 -        return 0;
   6.250 -    }
   6.251 -
   6.252 -    if ( csched_idler_compare(local_cpu, vc->processor) <= 0 )
   6.253 -    {
   6.254 -        CSCHED_STAT_CRANK(steal_peer_best_idler);
   6.255 -        return 0;
   6.256 -    }
   6.257 -
   6.258 -    return 1;
   6.259 +    return cpu;
   6.260  }
   6.261  
   6.262 -static void
   6.263 -csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
   6.264 +static inline void
   6.265 +__csched_vcpu_acct_start(struct csched_vcpu *svc)
   6.266  {
   6.267      struct csched_dom * const sdom = svc->sdom;
   6.268      unsigned long flags;
   6.269  
   6.270 -    /* Update credits */
   6.271 -    atomic_sub(credit_dec, &svc->credit);
   6.272 +    spin_lock_irqsave(&csched_priv.lock, flags);
   6.273  
   6.274 -    /* Put this VCPU and domain back on the active list if it was idling */
   6.275      if ( list_empty(&svc->active_vcpu_elem) )
   6.276      {
   6.277 -        spin_lock_irqsave(&csched_priv.lock, flags);
   6.278 -
   6.279 -        if ( list_empty(&svc->active_vcpu_elem) )
   6.280 -        {
   6.281 -            CSCHED_STAT_CRANK(acct_vcpu_active);
   6.282 -            svc->stats.state_active++;
   6.283 +        CSCHED_VCPU_STAT_CRANK(svc, state_active);
   6.284 +        CSCHED_STAT_CRANK(acct_vcpu_active);
   6.285  
   6.286 -            sdom->active_vcpu_count++;
   6.287 -            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
   6.288 -            if ( list_empty(&sdom->active_sdom_elem) )
   6.289 -            {
   6.290 -                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
   6.291 -                csched_priv.weight += sdom->weight;
   6.292 -            }
   6.293 +        sdom->active_vcpu_count++;
   6.294 +        list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
   6.295 +        if ( list_empty(&sdom->active_sdom_elem) )
   6.296 +        {
   6.297 +            list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
   6.298 +            csched_priv.weight += sdom->weight;
   6.299          }
   6.300 +    }
   6.301  
   6.302 -        spin_unlock_irqrestore(&csched_priv.lock, flags);
   6.303 +    spin_unlock_irqrestore(&csched_priv.lock, flags);
   6.304 +}
   6.305 +
   6.306 +static inline void
   6.307 +__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
   6.308 +{
   6.309 +    struct csched_dom * const sdom = svc->sdom;
   6.310 +
   6.311 +    BUG_ON( list_empty(&svc->active_vcpu_elem) );
   6.312 +
   6.313 +    CSCHED_VCPU_STAT_CRANK(svc, state_idle);
   6.314 +    CSCHED_STAT_CRANK(acct_vcpu_idle);
   6.315 +
   6.316 +    sdom->active_vcpu_count--;
   6.317 +    list_del_init(&svc->active_vcpu_elem);
   6.318 +    if ( list_empty(&sdom->active_vcpu) )
   6.319 +    {
   6.320 +        BUG_ON( csched_priv.weight < sdom->weight );
   6.321 +        list_del_init(&sdom->active_sdom_elem);
   6.322 +        csched_priv.weight -= sdom->weight;
   6.323      }
   6.324 +}
   6.325 +
   6.326 +static void
   6.327 +csched_vcpu_acct(unsigned int cpu)
   6.328 +{
   6.329 +    struct csched_vcpu * const svc = CSCHED_VCPU(current);
   6.330 +
   6.331 +    ASSERT( current->processor == cpu );
   6.332 +    ASSERT( svc->sdom != NULL );
   6.333  
   6.334      /*
   6.335       * If this VCPU's priority was boosted when it last awoke, reset it.
   6.336 @@ -495,25 +533,30 @@ csched_vcpu_acct(struct csched_vcpu *svc
   6.337       */
   6.338      if ( svc->pri == CSCHED_PRI_TS_BOOST )
   6.339          svc->pri = CSCHED_PRI_TS_UNDER;
   6.340 -}
   6.341 -
   6.342 -static inline void
   6.343 -__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
   6.344 -{
   6.345 -    struct csched_dom * const sdom = svc->sdom;
   6.346 -
   6.347 -    BUG_ON( list_empty(&svc->active_vcpu_elem) );
   6.348  
   6.349 -    CSCHED_STAT_CRANK(acct_vcpu_idle);
   6.350 -    svc->stats.state_idle++;
   6.351 +    /*
   6.352 +     * Update credits
   6.353 +     */
   6.354 +    atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit);
   6.355  
   6.356 -    sdom->active_vcpu_count--;
   6.357 -    list_del_init(&svc->active_vcpu_elem);
   6.358 -    if ( list_empty(&sdom->active_vcpu) )
   6.359 +    /*
   6.360 +     * Put this VCPU and domain back on the active list if it was
   6.361 +     * idling.
   6.362 +     *
   6.363 +     * If it's been active a while, check if we'd be better off
   6.364 +     * migrating it to run elsewhere (see multi-core and multi-thread
   6.365 +     * support in csched_cpu_pick()).
   6.366 +     */
   6.367 +    if ( list_empty(&svc->active_vcpu_elem) )
   6.368      {
   6.369 -        BUG_ON( csched_priv.weight < sdom->weight );
   6.370 -        list_del_init(&sdom->active_sdom_elem);
   6.371 -        csched_priv.weight -= sdom->weight;
   6.372 +        __csched_vcpu_acct_start(svc);
   6.373 +    }
   6.374 +    else if ( csched_cpu_pick(current) != cpu )
   6.375 +    {
   6.376 +        CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
   6.377 +        CSCHED_STAT_CRANK(migrate_running);
   6.378 +        set_bit(_VCPUF_migrating, &current->vcpu_flags);
   6.379 +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
   6.380      }
   6.381  }
   6.382  
   6.383 @@ -537,15 +580,11 @@ csched_vcpu_init(struct vcpu *vc)
   6.384      svc->vcpu = vc;
   6.385      atomic_set(&svc->credit, 0);
   6.386      svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
   6.387 -    memset(&svc->stats, 0, sizeof(svc->stats));
   6.388 +    CSCHED_VCPU_STATS_RESET(svc);
   6.389      vc->sched_priv = svc;
   6.390  
   6.391      CSCHED_VCPU_CHECK(vc);
   6.392  
   6.393 -    /* Attach fair-share VCPUs to the accounting list */
   6.394 -    if ( likely(sdom != NULL) )
   6.395 -        csched_vcpu_acct(svc, 0);
   6.396 -
   6.397      /* Allocate per-PCPU info */
   6.398      if ( unlikely(!CSCHED_PCPU(vc->processor)) )
   6.399      {
   6.400 @@ -573,7 +612,7 @@ csched_vcpu_destroy(struct vcpu *vc)
   6.401      spin_lock_irqsave(&csched_priv.lock, flags);
   6.402  
   6.403      if ( !list_empty(&svc->active_vcpu_elem) )
   6.404 -        __csched_vcpu_acct_idle_locked(svc);
   6.405 +        __csched_vcpu_acct_stop_locked(svc);
   6.406  
   6.407      spin_unlock_irqrestore(&csched_priv.lock, flags);
   6.408  
   6.409 @@ -717,66 +756,6 @@ csched_dom_destroy(struct domain *dom)
   6.410      xfree(sdom);
   6.411  }
   6.412  
   6.413 -static int
   6.414 -csched_cpu_pick(struct vcpu *vc)
   6.415 -{
   6.416 -    cpumask_t cpus;
   6.417 -    int cpu, nxt;
   6.418 -
   6.419 -    CSCHED_STAT_CRANK(cpu_pick);
   6.420 -
   6.421 -    /*
   6.422 -     * Pick from online CPUs in VCPU's affinity mask, giving a
   6.423 -     * preference to its current processor if it's in there.
   6.424 -     */
   6.425 -    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
   6.426 -    ASSERT( !cpus_empty(cpus) );
   6.427 -    cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus);
   6.428 -
   6.429 -    /*
   6.430 -     * Try to find an idle processor within the above constraints.
   6.431 -     */
   6.432 -    cpus_and(cpus, cpus, csched_priv.idlers);
   6.433 -    if ( !cpus_empty(cpus) )
   6.434 -    {
   6.435 -        cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus);
   6.436 -        cpu_clear(cpu, cpus);
   6.437 -
   6.438 -        /*
   6.439 -         * In multi-core and multi-threaded CPUs, not all idle execution
   6.440 -         * vehicles are equal!
   6.441 -         *
   6.442 -         * We give preference to the idle execution vehicle with the most
   6.443 -         * idling neighbours in its grouping. This distributes work across
   6.444 -         * distinct cores first and guarantees we don't do something stupid
   6.445 -         * like run two VCPUs on co-hyperthreads while there are idle cores
   6.446 -         * or sockets.
   6.447 -         */
   6.448 -        while ( !cpus_empty(cpus) )
   6.449 -        {
   6.450 -            nxt = first_cpu(cpus);
   6.451 -
   6.452 -            if ( csched_idler_compare(cpu, nxt) < 0 )
   6.453 -            {
   6.454 -                cpu = nxt;
   6.455 -                cpu_clear(nxt, cpus);
   6.456 -            }
   6.457 -            else if ( cpu_isset(cpu, cpu_core_map[nxt]) )
   6.458 -            {
   6.459 -                cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]);
   6.460 -            }
   6.461 -            else
   6.462 -            {
   6.463 -                cpus_andnot(cpus, cpus, cpu_core_map[nxt]);
   6.464 -            }
   6.465 -
   6.466 -            ASSERT( !cpu_isset(nxt, cpus) );
   6.467 -        }
   6.468 -    }
   6.469 -
   6.470 -    return cpu;
   6.471 -}
   6.472 -
   6.473  /*
   6.474   * This is a O(n) optimized sort of the runq.
   6.475   *
   6.476 @@ -981,14 +960,14 @@ csched_acct(void)
   6.477  
   6.478                  if ( credit > CSCHED_CREDITS_PER_TSLICE )
   6.479                  {
   6.480 -                    __csched_vcpu_acct_idle_locked(svc);
   6.481 +                    __csched_vcpu_acct_stop_locked(svc);
   6.482                      credit = 0;
   6.483                      atomic_set(&svc->credit, credit);
   6.484                  }
   6.485              }
   6.486  
   6.487 -            svc->stats.credit_last = credit;
   6.488 -            svc->stats.credit_incr = credit_fair;
   6.489 +            CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
   6.490 +            CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
   6.491              credit_balance += credit;
   6.492          }
   6.493      }
   6.494 @@ -1004,21 +983,14 @@ csched_acct(void)
   6.495  static void
   6.496  csched_tick(unsigned int cpu)
   6.497  {
   6.498 -    struct csched_vcpu * const svc = CSCHED_VCPU(current);
   6.499 -    struct csched_dom * const sdom = svc->sdom;
   6.500 -
   6.501      /*
   6.502       * Accounting for running VCPU
   6.503 -     *
   6.504 -     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
   6.505       */
   6.506 -    if ( likely(sdom != NULL) )
   6.507 -    {
   6.508 -        csched_vcpu_acct(svc, CSCHED_CREDITS_PER_TICK);
   6.509 -    }
   6.510 +    if ( !is_idle_vcpu(current) )
   6.511 +        csched_vcpu_acct(cpu);
   6.512  
   6.513      /*
   6.514 -     * Accounting duty
   6.515 +     * Host-wide accounting duty
   6.516       *
   6.517       * Note: Currently, this is always done by the master boot CPU. Eventually,
   6.518       * we could distribute or at the very least cycle the duty.
   6.519 @@ -1040,40 +1012,48 @@ csched_tick(unsigned int cpu)
   6.520  }
   6.521  
   6.522  static struct csched_vcpu *
   6.523 -csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
   6.524 +csched_runq_steal(int peer_cpu, int cpu, int pri)
   6.525  {
   6.526 +    const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
   6.527 +    const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
   6.528 +    struct csched_vcpu *speer;
   6.529      struct list_head *iter;
   6.530 -    struct csched_vcpu *speer;
   6.531      struct vcpu *vc;
   6.532  
   6.533 -    list_for_each( iter, &spc->runq )
   6.534 +    /*
   6.535 +     * Don't steal from an idle CPU's runq because it's about to
   6.536 +     * pick up work from it itself.
   6.537 +     */
   6.538 +    if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
   6.539      {
   6.540 -        speer = __runq_elem(iter);
   6.541 -
   6.542 -        /*
   6.543 -         * If next available VCPU here is not of higher priority than ours,
   6.544 -         * this PCPU is useless to us.
   6.545 -         */
   6.546 -        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
   6.547 +        list_for_each( iter, &peer_pcpu->runq )
   6.548          {
   6.549 -            CSCHED_STAT_CRANK(steal_peer_idle);
   6.550 -            break;
   6.551 -        }
   6.552 +            speer = __runq_elem(iter);
   6.553  
   6.554 -        /* Is this VCPU is runnable on our PCPU? */
   6.555 -        vc = speer->vcpu;
   6.556 -        BUG_ON( is_idle_vcpu(vc) );
   6.557 +            /*
   6.558 +             * If next available VCPU here is not of higher priority
   6.559 +             * than ours, this PCPU is useless to us.
   6.560 +             */
   6.561 +            if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
   6.562 +                break;
   6.563  
   6.564 -        if ( __csched_queued_vcpu_is_stealable(cpu, vc) )
   6.565 -        {
   6.566 -            /* We got a candidate. Grab it! */
   6.567 -            __runq_remove(speer);
   6.568 -            vc->processor = cpu;
   6.569 +            /* Is this VCPU is runnable on our PCPU? */
   6.570 +            vc = speer->vcpu;
   6.571 +            BUG_ON( is_idle_vcpu(vc) );
   6.572  
   6.573 -            return speer;
   6.574 +            if (__csched_vcpu_is_migrateable(vc, cpu))
   6.575 +            {
   6.576 +                /* We got a candidate. Grab it! */
   6.577 +                CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
   6.578 +                CSCHED_STAT_CRANK(migrate_queued);
   6.579 +                __runq_remove(speer);
   6.580 +                vc->processor = cpu;
   6.581 +                return speer;
   6.582 +            }
   6.583          }
   6.584      }
   6.585  
   6.586 +    CSCHED_STAT_CRANK(steal_peer_idle);
   6.587      return NULL;
   6.588  }
   6.589  
   6.590 @@ -1081,12 +1061,11 @@ static struct csched_vcpu *
   6.591  csched_load_balance(int cpu, struct csched_vcpu *snext)
   6.592  {
   6.593      struct csched_vcpu *speer;
   6.594 -    struct csched_pcpu *spc;
   6.595 -    struct vcpu *peer_vcpu;
   6.596      cpumask_t workers;
   6.597 -    cpumask_t loners;
   6.598      int peer_cpu;
   6.599  
   6.600 +    BUG_ON( cpu != snext->vcpu->processor );
   6.601 +
   6.602      if ( snext->pri == CSCHED_PRI_IDLE )
   6.603          CSCHED_STAT_CRANK(load_balance_idle);
   6.604      else if ( snext->pri == CSCHED_PRI_TS_OVER )
   6.605 @@ -1095,22 +1074,16 @@ csched_load_balance(int cpu, struct csch
   6.606          CSCHED_STAT_CRANK(load_balance_other);
   6.607  
   6.608      /*
   6.609 -     * Peek at non-idling CPUs in the system
   6.610 +     * Peek at non-idling CPUs in the system, starting with our
   6.611 +     * immediate neighbour.
   6.612       */
   6.613 -    cpus_clear(loners);
   6.614      cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
   6.615      cpu_clear(cpu, workers);
   6.616 -
   6.617      peer_cpu = cpu;
   6.618 -    BUG_ON( peer_cpu != snext->vcpu->processor );
   6.619  
   6.620      while ( !cpus_empty(workers) )
   6.621      {
   6.622 -        /* For each CPU of interest, starting with our neighbour... */
   6.623 -        peer_cpu = next_cpu(peer_cpu, workers);
   6.624 -        if ( peer_cpu == NR_CPUS )
   6.625 -            peer_cpu = first_cpu(workers);
   6.626 -
   6.627 +        peer_cpu = __cycle_cpu(peer_cpu, &workers);
   6.628          cpu_clear(peer_cpu, workers);
   6.629  
   6.630          /*
   6.631 @@ -1126,83 +1099,13 @@ csched_load_balance(int cpu, struct csch
   6.632              continue;
   6.633          }
   6.634  
   6.635 -        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
   6.636 -        spc = CSCHED_PCPU(peer_cpu);
   6.637 -
   6.638 -        if ( unlikely(spc == NULL) )
   6.639 -        {
   6.640 -            CSCHED_STAT_CRANK(steal_peer_down);
   6.641 -        }
   6.642 -        else if ( unlikely(is_idle_vcpu(peer_vcpu)) )
   6.643 -        {
   6.644 -            /*
   6.645 -             * Don't steal from an idle CPU's runq because it's about to
   6.646 -             * pick up work from it itself.
   6.647 -             */
   6.648 -            CSCHED_STAT_CRANK(steal_peer_idle);
   6.649 -        }
   6.650 -        else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) )
   6.651 -        {
   6.652 -            if ( snext->pri == CSCHED_PRI_IDLE &&
   6.653 -                 __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
   6.654 -            {
   6.655 -                CSCHED_STAT_CRANK(steal_loner_candidate);
   6.656 -                cpu_set(peer_cpu, loners);
   6.657 -            }
   6.658 -        }
   6.659 -        else
   6.660 -        {
   6.661 -            /* Try to steal work from a remote CPU's runq. */
   6.662 -            speer = csched_runq_steal(spc, cpu, snext->pri);
   6.663 -            if ( speer != NULL )
   6.664 -            {
   6.665 -                spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.666 -                CSCHED_STAT_CRANK(vcpu_migrate);
   6.667 -                speer->stats.migrate++;
   6.668 -                return speer;
   6.669 -            }
   6.670 -        }
   6.671 -
   6.672 +        /*
   6.673 +         * Any work over there to steal?
   6.674 +         */
   6.675 +        speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
   6.676          spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.677 -    }
   6.678 -
   6.679 -    /*
   6.680 -     * If we failed to find any remotely queued VCPUs to move here,
   6.681 -     * see if it would be more efficient to move any of the running
   6.682 -     * remote VCPUs over here.
   6.683 -     */
   6.684 -    while ( !cpus_empty(loners) )
   6.685 -    {
   6.686 -        /* For each CPU of interest, starting with our neighbour... */
   6.687 -        peer_cpu = next_cpu(peer_cpu, loners);
   6.688 -        if ( peer_cpu == NR_CPUS )
   6.689 -            peer_cpu = first_cpu(loners);
   6.690 -
   6.691 -        cpu_clear(peer_cpu, loners);
   6.692 -
   6.693 -        if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) )
   6.694 -        {
   6.695 -            CSCHED_STAT_CRANK(steal_trylock_failed);
   6.696 -            continue;
   6.697 -        }
   6.698 -
   6.699 -        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
   6.700 -        spc = CSCHED_PCPU(peer_cpu);
   6.701 -
   6.702 -        /* Signal the first candidate only. */
   6.703 -        if ( !is_idle_vcpu(peer_vcpu) &&
   6.704 -             is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) &&
   6.705 -             __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
   6.706 -        {
   6.707 -            set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags);
   6.708 -            spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.709 -
   6.710 -            CSCHED_STAT_CRANK(steal_loner_signal);
   6.711 -            cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ);
   6.712 -            break;
   6.713 -        }
   6.714 -
   6.715 -        spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.716 +        if ( speer != NULL )
   6.717 +            return speer;
   6.718      }
   6.719  
   6.720      /* Failed to find more important work elsewhere... */
   6.721 @@ -1270,7 +1173,6 @@ csched_schedule(s_time_t now)
   6.722      ret.task = snext->vcpu;
   6.723  
   6.724      CSCHED_VCPU_CHECK(ret.task);
   6.725 -
   6.726      return ret;
   6.727  }
   6.728  
   6.729 @@ -1287,14 +1189,16 @@ csched_dump_vcpu(struct csched_vcpu *svc
   6.730  
   6.731      if ( sdom )
   6.732      {
   6.733 -        printk(" credit=%i (%d+%u) {a/i=%u/%u m=%u w=%u}",
   6.734 -            atomic_read(&svc->credit),
   6.735 -            svc->stats.credit_last,
   6.736 -            svc->stats.credit_incr,
   6.737 -            svc->stats.state_active,
   6.738 -            svc->stats.state_idle,
   6.739 -            svc->stats.migrate,
   6.740 -            sdom->weight);
   6.741 +        printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
   6.742 +#ifdef CSCHED_STATS
   6.743 +        printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
   6.744 +                svc->stats.credit_last,
   6.745 +                svc->stats.credit_incr,
   6.746 +                svc->stats.state_active,
   6.747 +                svc->stats.state_idle,
   6.748 +                svc->stats.migrate_q,
   6.749 +                svc->stats.migrate_r);
   6.750 +#endif
   6.751      }
   6.752  
   6.753      printk("\n");
     7.1 --- a/xen/include/asm-x86/shadow.h	Tue Dec 12 14:35:07 2006 -0600
     7.2 +++ b/xen/include/asm-x86/shadow.h	Wed Dec 13 10:05:07 2006 +0000
     7.3 @@ -540,6 +540,9 @@ extern int shadow_remove_write_access(st
     7.4   * Returns non-zero if we need to flush TLBs. */
     7.5  extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
     7.6  
     7.7 +/* Remove all mappings from the shadows. */
     7.8 +extern void shadow_blow_tables(struct domain *d);
     7.9 +
    7.10  void
    7.11  shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
    7.12  /* This is a HVM page that we thing is no longer a pagetable.