ia64/xen-unstable

changeset 12988:3c7a6081f1a9

Merge with xenppc-unstable
author kfraser@localhost.localdomain
date Wed Dec 13 10:05:07 2006 +0000 (2006-12-13)
parents c08ac3b211c8 2fa06640a1c1
children d5499ff80b19 de69059a1f0e
files
line diff
     1.1 --- a/xen/arch/x86/crash.c	Tue Dec 12 14:35:07 2006 -0600
     1.2 +++ b/xen/arch/x86/crash.c	Wed Dec 13 10:05:07 2006 +0000
     1.3 @@ -58,9 +58,9 @@ static int crash_nmi_callback(struct cpu
     1.4  static void smp_send_nmi_allbutself(void)
     1.5  {
     1.6      cpumask_t allbutself = cpu_online_map;
     1.7 -
     1.8      cpu_clear(smp_processor_id(), allbutself);
     1.9 -    send_IPI_mask(allbutself, APIC_DM_NMI);
    1.10 +    if ( !cpus_empty(allbutself) )
    1.11 +        send_IPI_mask(allbutself, APIC_DM_NMI);
    1.12  }
    1.13  
    1.14  static void nmi_shootdown_cpus(void)
     2.1 --- a/xen/arch/x86/mm.c	Tue Dec 12 14:35:07 2006 -0600
     2.2 +++ b/xen/arch/x86/mm.c	Wed Dec 13 10:05:07 2006 +0000
     2.3 @@ -2951,7 +2951,17 @@ long arch_memory_op(int op, XEN_GUEST_HA
     2.4          guest_physmap_add_page(d, xatp.gpfn, mfn);
     2.5  
     2.6          UNLOCK_BIGLOCK(d);
     2.7 -        
     2.8 +
     2.9 +        /* If we're doing FAST_FAULT_PATH, then shadow mode may have
    2.10 +           cached the fact that this is an mmio region in the shadow
    2.11 +           page tables.  Blow the tables away to remove the cache.
    2.12 +           This is pretty heavy handed, but this is a rare operation
    2.13 +           (it might happen a dozen times during boot and then never
    2.14 +           again), so it doesn't matter too much. */
    2.15 +        shadow_lock(d);
    2.16 +        shadow_blow_tables(d);
    2.17 +        shadow_unlock(d);
    2.18 +
    2.19          put_domain(d);
    2.20  
    2.21          break;
     3.1 --- a/xen/arch/x86/mm/shadow/common.c	Tue Dec 12 14:35:07 2006 -0600
     3.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Dec 13 10:05:07 2006 +0000
     3.3 @@ -791,7 +791,7 @@ void shadow_prealloc(struct domain *d, u
     3.4  
     3.5  /* Deliberately free all the memory we can: this will tear down all of
     3.6   * this domain's shadows */
     3.7 -static void shadow_blow_tables(struct domain *d) 
     3.8 +void shadow_blow_tables(struct domain *d) 
     3.9  {
    3.10      struct list_head *l, *t;
    3.11      struct shadow_page_info *sp;
     4.1 --- a/xen/arch/x86/mm/shadow/multi.c	Tue Dec 12 14:35:07 2006 -0600
     4.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Wed Dec 13 10:05:07 2006 +0000
     4.3 @@ -3488,6 +3488,9 @@ sh_update_cr3(struct vcpu *v)
     4.4                                         ? SH_type_l2h_shadow 
     4.5                                         : SH_type_l2_shadow);
     4.6              }
     4.7 +            else
     4.8 +                /* The guest is not present: clear out the shadow. */
     4.9 +                sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
    4.10          }
    4.11      }
    4.12  #elif GUEST_PAGING_LEVELS == 4
     5.1 --- a/xen/common/domain.c	Tue Dec 12 14:35:07 2006 -0600
     5.2 +++ b/xen/common/domain.c	Wed Dec 13 10:05:07 2006 +0000
     5.3 @@ -238,7 +238,11 @@ void domain_kill(struct domain *d)
     5.4  
     5.5  void __domain_crash(struct domain *d)
     5.6  {
     5.7 -    if ( d == current->domain )
     5.8 +    if ( test_bit(_DOMF_shutdown, &d->domain_flags) )
     5.9 +    {
    5.10 +        /* Print nothing: the domain is already shutting down. */
    5.11 +    }
    5.12 +    else if ( d == current->domain )
    5.13      {
    5.14          printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
    5.15                 d->domain_id, current->vcpu_id, smp_processor_id());
     6.1 --- a/xen/common/sched_credit.c	Tue Dec 12 14:35:07 2006 -0600
     6.2 +++ b/xen/common/sched_credit.c	Wed Dec 13 10:05:07 2006 +0000
     6.3 @@ -106,20 +106,13 @@
     6.4      _MACRO(tickle_local_other)              \
     6.5      _MACRO(tickle_idlers_none)              \
     6.6      _MACRO(tickle_idlers_some)              \
     6.7 -    _MACRO(vcpu_migrate)                    \
     6.8      _MACRO(load_balance_idle)               \
     6.9      _MACRO(load_balance_over)               \
    6.10      _MACRO(load_balance_other)              \
    6.11      _MACRO(steal_trylock_failed)            \
    6.12 -    _MACRO(steal_peer_down)                 \
    6.13      _MACRO(steal_peer_idle)                 \
    6.14 -    _MACRO(steal_peer_running)              \
    6.15 -    _MACRO(steal_peer_pinned)               \
    6.16 -    _MACRO(steal_peer_migrating)            \
    6.17 -    _MACRO(steal_peer_best_idler)           \
    6.18 -    _MACRO(steal_loner_candidate)           \
    6.19 -    _MACRO(steal_loner_signal)              \
    6.20 -    _MACRO(cpu_pick)                        \
    6.21 +    _MACRO(migrate_queued)                  \
    6.22 +    _MACRO(migrate_running)                 \
    6.23      _MACRO(dom_init)                        \
    6.24      _MACRO(dom_destroy)                     \
    6.25      _MACRO(vcpu_init)                       \
    6.26 @@ -146,7 +139,7 @@
    6.27      struct                                      \
    6.28      {                                           \
    6.29          CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
    6.30 -    } stats
    6.31 +    } stats;
    6.32  
    6.33  #define CSCHED_STATS_PRINTK()                   \
    6.34      do                                          \
    6.35 @@ -155,14 +148,27 @@
    6.36          CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
    6.37      } while ( 0 )
    6.38  
    6.39 -#define CSCHED_STAT_CRANK(_X)   (CSCHED_STAT(_X)++)
    6.40 +#define CSCHED_STAT_CRANK(_X)               (CSCHED_STAT(_X)++)
    6.41 +
    6.42 +#define CSCHED_VCPU_STATS_RESET(_V)                     \
    6.43 +    do                                                  \
    6.44 +    {                                                   \
    6.45 +        memset(&(_V)->stats, 0, sizeof((_V)->stats));   \
    6.46 +    } while ( 0 )
    6.47 +
    6.48 +#define CSCHED_VCPU_STAT_CRANK(_V, _X)      (((_V)->stats._X)++)
    6.49 +
    6.50 +#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    (((_V)->stats._X) = (_Y))
    6.51  
    6.52  #else /* CSCHED_STATS */
    6.53  
    6.54 -#define CSCHED_STATS_RESET()    do {} while ( 0 )
    6.55 -#define CSCHED_STATS_DEFINE()   do {} while ( 0 )
    6.56 -#define CSCHED_STATS_PRINTK()   do {} while ( 0 )
    6.57 -#define CSCHED_STAT_CRANK(_X)   do {} while ( 0 )
    6.58 +#define CSCHED_STATS_RESET()                do {} while ( 0 )
    6.59 +#define CSCHED_STATS_DEFINE()
    6.60 +#define CSCHED_STATS_PRINTK()               do {} while ( 0 )
    6.61 +#define CSCHED_STAT_CRANK(_X)               do {} while ( 0 )
    6.62 +#define CSCHED_VCPU_STATS_RESET(_V)         do {} while ( 0 )
    6.63 +#define CSCHED_VCPU_STAT_CRANK(_V, _X)      do {} while ( 0 )
    6.64 +#define CSCHED_VCPU_STAT_SET(_V, _X, _Y)    do {} while ( 0 )
    6.65  
    6.66  #endif /* CSCHED_STATS */
    6.67  
    6.68 @@ -185,13 +191,16 @@ struct csched_vcpu {
    6.69      struct vcpu *vcpu;
    6.70      atomic_t credit;
    6.71      int16_t pri;
    6.72 +#ifdef CSCHED_STATS
    6.73      struct {
    6.74          int credit_last;
    6.75          uint32_t credit_incr;
    6.76          uint32_t state_active;
    6.77          uint32_t state_idle;
    6.78 -        uint32_t migrate;
    6.79 +        uint32_t migrate_q;
    6.80 +        uint32_t migrate_r;
    6.81      } stats;
    6.82 +#endif
    6.83  };
    6.84  
    6.85  /*
    6.86 @@ -219,7 +228,7 @@ struct csched_private {
    6.87      uint32_t credit;
    6.88      int credit_balance;
    6.89      uint32_t runq_sort;
    6.90 -    CSCHED_STATS_DEFINE();
    6.91 +    CSCHED_STATS_DEFINE()
    6.92  };
    6.93  
    6.94  
    6.95 @@ -231,6 +240,15 @@ static struct csched_private csched_priv
    6.96  
    6.97  
    6.98  static inline int
    6.99 +__cycle_cpu(int cpu, const cpumask_t *mask)
   6.100 +{
   6.101 +    int nxt = next_cpu(cpu, *mask);
   6.102 +    if (nxt == NR_CPUS)
   6.103 +        nxt = first_cpu(*mask);
   6.104 +    return nxt;
   6.105 +}
   6.106 +
   6.107 +static inline int
   6.108  __vcpu_on_runq(struct csched_vcpu *svc)
   6.109  {
   6.110      return !list_empty(&svc->runq_elem);
   6.111 @@ -375,118 +393,138 @@ static inline void
   6.112  #define CSCHED_VCPU_CHECK(_vc)
   6.113  #endif
   6.114  
   6.115 -/*
   6.116 - * Indicates which of two given idlers is most efficient to run
   6.117 - * an additional VCPU.
   6.118 - *
   6.119 - * Returns:
   6.120 - *  0:           They are the same.
   6.121 - *  negative:    One is less efficient than Two.
   6.122 - *  positive:    One is more efficient than Two.
   6.123 - */
   6.124 -static int
   6.125 -csched_idler_compare(int one, int two)
   6.126 -{
   6.127 -    cpumask_t idlers;
   6.128 -    cpumask_t one_idlers;
   6.129 -    cpumask_t two_idlers;
   6.130 -
   6.131 -    idlers = csched_priv.idlers;
   6.132 -    cpu_clear(one, idlers);
   6.133 -    cpu_clear(two, idlers);
   6.134 -
   6.135 -    if ( cpu_isset(one, cpu_core_map[two]) )
   6.136 -    {
   6.137 -        cpus_and(one_idlers, idlers, cpu_sibling_map[one]);
   6.138 -        cpus_and(two_idlers, idlers, cpu_sibling_map[two]);
   6.139 -    }
   6.140 -    else
   6.141 -    {
   6.142 -        cpus_and(one_idlers, idlers, cpu_core_map[one]);
   6.143 -        cpus_and(two_idlers, idlers, cpu_core_map[two]);
   6.144 -    }
   6.145 -
   6.146 -    return cpus_weight(one_idlers) - cpus_weight(two_idlers);
   6.147 -}
   6.148 -
   6.149  static inline int
   6.150 -__csched_queued_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
   6.151 +__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
   6.152  {
   6.153      /*
   6.154       * Don't pick up work that's in the peer's scheduling tail. Also only pick
   6.155       * up work that's allowed to run on our CPU.
   6.156       */
   6.157 -    if ( unlikely(test_bit(_VCPUF_running, &vc->vcpu_flags)) )
   6.158 -    {
   6.159 -        CSCHED_STAT_CRANK(steal_peer_running);
   6.160 -        return 0;
   6.161 -    }
   6.162 -
   6.163 -    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
   6.164 -    {
   6.165 -        CSCHED_STAT_CRANK(steal_peer_pinned);
   6.166 -        return 0;
   6.167 -    }
   6.168 -
   6.169 -    return 1;
   6.170 +    return !test_bit(_VCPUF_running, &vc->vcpu_flags) &&
   6.171 +           cpu_isset(dest_cpu, vc->cpu_affinity);
   6.172  }
   6.173  
   6.174 -static inline int
   6.175 -__csched_running_vcpu_is_stealable(int local_cpu, struct vcpu *vc)
   6.176 +static int
   6.177 +csched_cpu_pick(struct vcpu *vc)
   6.178  {
   6.179 -    BUG_ON( is_idle_vcpu(vc) );
   6.180 +    cpumask_t cpus;
   6.181 +    cpumask_t idlers;
   6.182 +    int cpu;
   6.183 +
   6.184 +    /*
   6.185 +     * Pick from online CPUs in VCPU's affinity mask, giving a
   6.186 +     * preference to its current processor if it's in there.
   6.187 +     */
   6.188 +    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
   6.189 +    cpu = cpu_isset(vc->processor, cpus)
   6.190 +            ? vc->processor
   6.191 +            : __cycle_cpu(vc->processor, &cpus);
   6.192 +    ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
   6.193  
   6.194 -    if ( unlikely(!cpu_isset(local_cpu, vc->cpu_affinity)) )
   6.195 +    /*
   6.196 +     * Try to find an idle processor within the above constraints.
   6.197 +     *
   6.198 +     * In multi-core and multi-threaded CPUs, not all idle execution
   6.199 +     * vehicles are equal!
   6.200 +     *
   6.201 +     * We give preference to the idle execution vehicle with the most
   6.202 +     * idling neighbours in its grouping. This distributes work across
   6.203 +     * distinct cores first and guarantees we don't do something stupid
   6.204 +     * like run two VCPUs on co-hyperthreads while there are idle cores
   6.205 +     * or sockets.
   6.206 +     */
   6.207 +    idlers = csched_priv.idlers;
   6.208 +    cpu_set(cpu, idlers);
   6.209 +    cpus_and(cpus, cpus, idlers);
   6.210 +    cpu_clear(cpu, cpus);
   6.211 +
   6.212 +    while ( !cpus_empty(cpus) )
   6.213      {
   6.214 -        CSCHED_STAT_CRANK(steal_peer_pinned);
   6.215 -        return 0;
   6.216 +        cpumask_t cpu_idlers;
   6.217 +        cpumask_t nxt_idlers;
   6.218 +        int nxt;
   6.219 +
   6.220 +        nxt = __cycle_cpu(cpu, &cpus);
   6.221 +
   6.222 +        if ( cpu_isset(cpu, cpu_core_map[nxt]) )
   6.223 +        {
   6.224 +            ASSERT( cpu_isset(nxt, cpu_core_map[cpu]) );
   6.225 +            cpus_and(cpu_idlers, idlers, cpu_sibling_map[cpu]);
   6.226 +            cpus_and(nxt_idlers, idlers, cpu_sibling_map[nxt]);
   6.227 +        }
   6.228 +        else
   6.229 +        {
   6.230 +            ASSERT( !cpu_isset(nxt, cpu_core_map[cpu]) );
   6.231 +            cpus_and(cpu_idlers, idlers, cpu_core_map[cpu]);
   6.232 +            cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
   6.233 +        }
   6.234 +
   6.235 +        if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) )
   6.236 +        {
   6.237 +            cpu = nxt;
   6.238 +            cpu_clear(cpu, cpus);
   6.239 +        }
   6.240 +        else
   6.241 +        {
   6.242 +            cpus_andnot(cpus, cpus, nxt_idlers);
   6.243 +        }
   6.244      }
   6.245  
   6.246 -    if ( test_bit(_VCPUF_migrating, &vc->vcpu_flags) )
   6.247 -    {
   6.248 -        CSCHED_STAT_CRANK(steal_peer_migrating);
   6.249 -        return 0;
   6.250 -    }
   6.251 -
   6.252 -    if ( csched_idler_compare(local_cpu, vc->processor) <= 0 )
   6.253 -    {
   6.254 -        CSCHED_STAT_CRANK(steal_peer_best_idler);
   6.255 -        return 0;
   6.256 -    }
   6.257 -
   6.258 -    return 1;
   6.259 +    return cpu;
   6.260  }
   6.261  
   6.262 -static void
   6.263 -csched_vcpu_acct(struct csched_vcpu *svc, int credit_dec)
   6.264 +static inline void
   6.265 +__csched_vcpu_acct_start(struct csched_vcpu *svc)
   6.266  {
   6.267      struct csched_dom * const sdom = svc->sdom;
   6.268      unsigned long flags;
   6.269  
   6.270 -    /* Update credits */
   6.271 -    atomic_sub(credit_dec, &svc->credit);
   6.272 +    spin_lock_irqsave(&csched_priv.lock, flags);
   6.273  
   6.274 -    /* Put this VCPU and domain back on the active list if it was idling */
   6.275      if ( list_empty(&svc->active_vcpu_elem) )
   6.276      {
   6.277 -        spin_lock_irqsave(&csched_priv.lock, flags);
   6.278 +        CSCHED_VCPU_STAT_CRANK(svc, state_active);
   6.279 +        CSCHED_STAT_CRANK(acct_vcpu_active);
   6.280  
   6.281 -        if ( list_empty(&svc->active_vcpu_elem) )
   6.282 +        sdom->active_vcpu_count++;
   6.283 +        list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
   6.284 +        if ( list_empty(&sdom->active_sdom_elem) )
   6.285          {
   6.286 -            CSCHED_STAT_CRANK(acct_vcpu_active);
   6.287 -            svc->stats.state_active++;
   6.288 +            list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
   6.289 +            csched_priv.weight += sdom->weight;
   6.290 +        }
   6.291 +    }
   6.292 +
   6.293 +    spin_unlock_irqrestore(&csched_priv.lock, flags);
   6.294 +}
   6.295 +
   6.296 +static inline void
   6.297 +__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
   6.298 +{
   6.299 +    struct csched_dom * const sdom = svc->sdom;
   6.300  
   6.301 -            sdom->active_vcpu_count++;
   6.302 -            list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
   6.303 -            if ( list_empty(&sdom->active_sdom_elem) )
   6.304 -            {
   6.305 -                list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
   6.306 -                csched_priv.weight += sdom->weight;
   6.307 -            }
   6.308 -        }
   6.309 +    BUG_ON( list_empty(&svc->active_vcpu_elem) );
   6.310 +
   6.311 +    CSCHED_VCPU_STAT_CRANK(svc, state_idle);
   6.312 +    CSCHED_STAT_CRANK(acct_vcpu_idle);
   6.313  
   6.314 -        spin_unlock_irqrestore(&csched_priv.lock, flags);
   6.315 +    sdom->active_vcpu_count--;
   6.316 +    list_del_init(&svc->active_vcpu_elem);
   6.317 +    if ( list_empty(&sdom->active_vcpu) )
   6.318 +    {
   6.319 +        BUG_ON( csched_priv.weight < sdom->weight );
   6.320 +        list_del_init(&sdom->active_sdom_elem);
   6.321 +        csched_priv.weight -= sdom->weight;
   6.322      }
   6.323 +}
   6.324 +
   6.325 +static void
   6.326 +csched_vcpu_acct(unsigned int cpu)
   6.327 +{
   6.328 +    struct csched_vcpu * const svc = CSCHED_VCPU(current);
   6.329 +
   6.330 +    ASSERT( current->processor == cpu );
   6.331 +    ASSERT( svc->sdom != NULL );
   6.332  
   6.333      /*
   6.334       * If this VCPU's priority was boosted when it last awoke, reset it.
   6.335 @@ -495,25 +533,30 @@ csched_vcpu_acct(struct csched_vcpu *svc
   6.336       */
   6.337      if ( svc->pri == CSCHED_PRI_TS_BOOST )
   6.338          svc->pri = CSCHED_PRI_TS_UNDER;
   6.339 -}
   6.340  
   6.341 -static inline void
   6.342 -__csched_vcpu_acct_idle_locked(struct csched_vcpu *svc)
   6.343 -{
   6.344 -    struct csched_dom * const sdom = svc->sdom;
   6.345 -
   6.346 -    BUG_ON( list_empty(&svc->active_vcpu_elem) );
   6.347 +    /*
   6.348 +     * Update credits
   6.349 +     */
   6.350 +    atomic_sub(CSCHED_CREDITS_PER_TICK, &svc->credit);
   6.351  
   6.352 -    CSCHED_STAT_CRANK(acct_vcpu_idle);
   6.353 -    svc->stats.state_idle++;
   6.354 -
   6.355 -    sdom->active_vcpu_count--;
   6.356 -    list_del_init(&svc->active_vcpu_elem);
   6.357 -    if ( list_empty(&sdom->active_vcpu) )
   6.358 +    /*
   6.359 +     * Put this VCPU and domain back on the active list if it was
   6.360 +     * idling.
   6.361 +     *
   6.362 +     * If it's been active a while, check if we'd be better off
   6.363 +     * migrating it to run elsewhere (see multi-core and multi-thread
   6.364 +     * support in csched_cpu_pick()).
   6.365 +     */
   6.366 +    if ( list_empty(&svc->active_vcpu_elem) )
   6.367      {
   6.368 -        BUG_ON( csched_priv.weight < sdom->weight );
   6.369 -        list_del_init(&sdom->active_sdom_elem);
   6.370 -        csched_priv.weight -= sdom->weight;
   6.371 +        __csched_vcpu_acct_start(svc);
   6.372 +    }
   6.373 +    else if ( csched_cpu_pick(current) != cpu )
   6.374 +    {
   6.375 +        CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
   6.376 +        CSCHED_STAT_CRANK(migrate_running);
   6.377 +        set_bit(_VCPUF_migrating, &current->vcpu_flags);
   6.378 +        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
   6.379      }
   6.380  }
   6.381  
   6.382 @@ -537,15 +580,11 @@ csched_vcpu_init(struct vcpu *vc)
   6.383      svc->vcpu = vc;
   6.384      atomic_set(&svc->credit, 0);
   6.385      svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
   6.386 -    memset(&svc->stats, 0, sizeof(svc->stats));
   6.387 +    CSCHED_VCPU_STATS_RESET(svc);
   6.388      vc->sched_priv = svc;
   6.389  
   6.390      CSCHED_VCPU_CHECK(vc);
   6.391  
   6.392 -    /* Attach fair-share VCPUs to the accounting list */
   6.393 -    if ( likely(sdom != NULL) )
   6.394 -        csched_vcpu_acct(svc, 0);
   6.395 -
   6.396      /* Allocate per-PCPU info */
   6.397      if ( unlikely(!CSCHED_PCPU(vc->processor)) )
   6.398      {
   6.399 @@ -573,7 +612,7 @@ csched_vcpu_destroy(struct vcpu *vc)
   6.400      spin_lock_irqsave(&csched_priv.lock, flags);
   6.401  
   6.402      if ( !list_empty(&svc->active_vcpu_elem) )
   6.403 -        __csched_vcpu_acct_idle_locked(svc);
   6.404 +        __csched_vcpu_acct_stop_locked(svc);
   6.405  
   6.406      spin_unlock_irqrestore(&csched_priv.lock, flags);
   6.407  
   6.408 @@ -717,66 +756,6 @@ csched_dom_destroy(struct domain *dom)
   6.409      xfree(sdom);
   6.410  }
   6.411  
   6.412 -static int
   6.413 -csched_cpu_pick(struct vcpu *vc)
   6.414 -{
   6.415 -    cpumask_t cpus;
   6.416 -    int cpu, nxt;
   6.417 -
   6.418 -    CSCHED_STAT_CRANK(cpu_pick);
   6.419 -
   6.420 -    /*
   6.421 -     * Pick from online CPUs in VCPU's affinity mask, giving a
   6.422 -     * preference to its current processor if it's in there.
   6.423 -     */
   6.424 -    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
   6.425 -    ASSERT( !cpus_empty(cpus) );
   6.426 -    cpu = cpu_isset(vc->processor, cpus) ? vc->processor : first_cpu(cpus);
   6.427 -
   6.428 -    /*
   6.429 -     * Try to find an idle processor within the above constraints.
   6.430 -     */
   6.431 -    cpus_and(cpus, cpus, csched_priv.idlers);
   6.432 -    if ( !cpus_empty(cpus) )
   6.433 -    {
   6.434 -        cpu = cpu_isset(cpu, cpus) ? cpu : first_cpu(cpus);
   6.435 -        cpu_clear(cpu, cpus);
   6.436 -
   6.437 -        /*
   6.438 -         * In multi-core and multi-threaded CPUs, not all idle execution
   6.439 -         * vehicles are equal!
   6.440 -         *
   6.441 -         * We give preference to the idle execution vehicle with the most
   6.442 -         * idling neighbours in its grouping. This distributes work across
   6.443 -         * distinct cores first and guarantees we don't do something stupid
   6.444 -         * like run two VCPUs on co-hyperthreads while there are idle cores
   6.445 -         * or sockets.
   6.446 -         */
   6.447 -        while ( !cpus_empty(cpus) )
   6.448 -        {
   6.449 -            nxt = first_cpu(cpus);
   6.450 -
   6.451 -            if ( csched_idler_compare(cpu, nxt) < 0 )
   6.452 -            {
   6.453 -                cpu = nxt;
   6.454 -                cpu_clear(nxt, cpus);
   6.455 -            }
   6.456 -            else if ( cpu_isset(cpu, cpu_core_map[nxt]) )
   6.457 -            {
   6.458 -                cpus_andnot(cpus, cpus, cpu_sibling_map[nxt]);
   6.459 -            }
   6.460 -            else
   6.461 -            {
   6.462 -                cpus_andnot(cpus, cpus, cpu_core_map[nxt]);
   6.463 -            }
   6.464 -
   6.465 -            ASSERT( !cpu_isset(nxt, cpus) );
   6.466 -        }
   6.467 -    }
   6.468 -
   6.469 -    return cpu;
   6.470 -}
   6.471 -
   6.472  /*
   6.473   * This is a O(n) optimized sort of the runq.
   6.474   *
   6.475 @@ -981,14 +960,14 @@ csched_acct(void)
   6.476  
   6.477                  if ( credit > CSCHED_CREDITS_PER_TSLICE )
   6.478                  {
   6.479 -                    __csched_vcpu_acct_idle_locked(svc);
   6.480 +                    __csched_vcpu_acct_stop_locked(svc);
   6.481                      credit = 0;
   6.482                      atomic_set(&svc->credit, credit);
   6.483                  }
   6.484              }
   6.485  
   6.486 -            svc->stats.credit_last = credit;
   6.487 -            svc->stats.credit_incr = credit_fair;
   6.488 +            CSCHED_VCPU_STAT_SET(svc, credit_last, credit);
   6.489 +            CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair);
   6.490              credit_balance += credit;
   6.491          }
   6.492      }
   6.493 @@ -1004,21 +983,14 @@ csched_acct(void)
   6.494  static void
   6.495  csched_tick(unsigned int cpu)
   6.496  {
   6.497 -    struct csched_vcpu * const svc = CSCHED_VCPU(current);
   6.498 -    struct csched_dom * const sdom = svc->sdom;
   6.499 -
   6.500      /*
   6.501       * Accounting for running VCPU
   6.502 -     *
   6.503 -     * Note: Some VCPUs, such as the idle tasks, are not credit scheduled.
   6.504       */
   6.505 -    if ( likely(sdom != NULL) )
   6.506 -    {
   6.507 -        csched_vcpu_acct(svc, CSCHED_CREDITS_PER_TICK);
   6.508 -    }
   6.509 +    if ( !is_idle_vcpu(current) )
   6.510 +        csched_vcpu_acct(cpu);
   6.511  
   6.512      /*
   6.513 -     * Accounting duty
   6.514 +     * Host-wide accounting duty
   6.515       *
   6.516       * Note: Currently, this is always done by the master boot CPU. Eventually,
   6.517       * we could distribute or at the very least cycle the duty.
   6.518 @@ -1040,40 +1012,48 @@ csched_tick(unsigned int cpu)
   6.519  }
   6.520  
   6.521  static struct csched_vcpu *
   6.522 -csched_runq_steal(struct csched_pcpu *spc, int cpu, int pri)
   6.523 +csched_runq_steal(int peer_cpu, int cpu, int pri)
   6.524  {
   6.525 +    const struct csched_pcpu * const peer_pcpu = CSCHED_PCPU(peer_cpu);
   6.526 +    const struct vcpu * const peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
   6.527 +    struct csched_vcpu *speer;
   6.528      struct list_head *iter;
   6.529 -    struct csched_vcpu *speer;
   6.530      struct vcpu *vc;
   6.531  
   6.532 -    list_for_each( iter, &spc->runq )
   6.533 +    /*
   6.534 +     * Don't steal from an idle CPU's runq because it's about to
   6.535 +     * pick up work from it itself.
   6.536 +     */
   6.537 +    if ( peer_pcpu != NULL && !is_idle_vcpu(peer_vcpu) )
   6.538      {
   6.539 -        speer = __runq_elem(iter);
   6.540 -
   6.541 -        /*
   6.542 -         * If next available VCPU here is not of higher priority than ours,
   6.543 -         * this PCPU is useless to us.
   6.544 -         */
   6.545 -        if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
   6.546 +        list_for_each( iter, &peer_pcpu->runq )
   6.547          {
   6.548 -            CSCHED_STAT_CRANK(steal_peer_idle);
   6.549 -            break;
   6.550 -        }
   6.551 +            speer = __runq_elem(iter);
   6.552  
   6.553 -        /* Is this VCPU is runnable on our PCPU? */
   6.554 -        vc = speer->vcpu;
   6.555 -        BUG_ON( is_idle_vcpu(vc) );
   6.556 +            /*
   6.557 +             * If next available VCPU here is not of higher priority
   6.558 +             * than ours, this PCPU is useless to us.
   6.559 +             */
   6.560 +            if ( speer->pri <= CSCHED_PRI_IDLE || speer->pri <= pri )
   6.561 +                break;
   6.562  
   6.563 -        if ( __csched_queued_vcpu_is_stealable(cpu, vc) )
   6.564 -        {
   6.565 -            /* We got a candidate. Grab it! */
   6.566 -            __runq_remove(speer);
   6.567 -            vc->processor = cpu;
   6.568 +            /* Is this VCPU is runnable on our PCPU? */
   6.569 +            vc = speer->vcpu;
   6.570 +            BUG_ON( is_idle_vcpu(vc) );
   6.571  
   6.572 -            return speer;
   6.573 +            if (__csched_vcpu_is_migrateable(vc, cpu))
   6.574 +            {
   6.575 +                /* We got a candidate. Grab it! */
   6.576 +                CSCHED_VCPU_STAT_CRANK(speer, migrate_q);
   6.577 +                CSCHED_STAT_CRANK(migrate_queued);
   6.578 +                __runq_remove(speer);
   6.579 +                vc->processor = cpu;
   6.580 +                return speer;
   6.581 +            }
   6.582          }
   6.583      }
   6.584  
   6.585 +    CSCHED_STAT_CRANK(steal_peer_idle);
   6.586      return NULL;
   6.587  }
   6.588  
   6.589 @@ -1081,12 +1061,11 @@ static struct csched_vcpu *
   6.590  csched_load_balance(int cpu, struct csched_vcpu *snext)
   6.591  {
   6.592      struct csched_vcpu *speer;
   6.593 -    struct csched_pcpu *spc;
   6.594 -    struct vcpu *peer_vcpu;
   6.595      cpumask_t workers;
   6.596 -    cpumask_t loners;
   6.597      int peer_cpu;
   6.598  
   6.599 +    BUG_ON( cpu != snext->vcpu->processor );
   6.600 +
   6.601      if ( snext->pri == CSCHED_PRI_IDLE )
   6.602          CSCHED_STAT_CRANK(load_balance_idle);
   6.603      else if ( snext->pri == CSCHED_PRI_TS_OVER )
   6.604 @@ -1095,22 +1074,16 @@ csched_load_balance(int cpu, struct csch
   6.605          CSCHED_STAT_CRANK(load_balance_other);
   6.606  
   6.607      /*
   6.608 -     * Peek at non-idling CPUs in the system
   6.609 +     * Peek at non-idling CPUs in the system, starting with our
   6.610 +     * immediate neighbour.
   6.611       */
   6.612 -    cpus_clear(loners);
   6.613      cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
   6.614      cpu_clear(cpu, workers);
   6.615 -
   6.616      peer_cpu = cpu;
   6.617 -    BUG_ON( peer_cpu != snext->vcpu->processor );
   6.618  
   6.619      while ( !cpus_empty(workers) )
   6.620      {
   6.621 -        /* For each CPU of interest, starting with our neighbour... */
   6.622 -        peer_cpu = next_cpu(peer_cpu, workers);
   6.623 -        if ( peer_cpu == NR_CPUS )
   6.624 -            peer_cpu = first_cpu(workers);
   6.625 -
   6.626 +        peer_cpu = __cycle_cpu(peer_cpu, &workers);
   6.627          cpu_clear(peer_cpu, workers);
   6.628  
   6.629          /*
   6.630 @@ -1126,83 +1099,13 @@ csched_load_balance(int cpu, struct csch
   6.631              continue;
   6.632          }
   6.633  
   6.634 -        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
   6.635 -        spc = CSCHED_PCPU(peer_cpu);
   6.636 -
   6.637 -        if ( unlikely(spc == NULL) )
   6.638 -        {
   6.639 -            CSCHED_STAT_CRANK(steal_peer_down);
   6.640 -        }
   6.641 -        else if ( unlikely(is_idle_vcpu(peer_vcpu)) )
   6.642 -        {
   6.643 -            /*
   6.644 -             * Don't steal from an idle CPU's runq because it's about to
   6.645 -             * pick up work from it itself.
   6.646 -             */
   6.647 -            CSCHED_STAT_CRANK(steal_peer_idle);
   6.648 -        }
   6.649 -        else if ( is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) )
   6.650 -        {
   6.651 -            if ( snext->pri == CSCHED_PRI_IDLE &&
   6.652 -                 __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
   6.653 -            {
   6.654 -                CSCHED_STAT_CRANK(steal_loner_candidate);
   6.655 -                cpu_set(peer_cpu, loners);
   6.656 -            }
   6.657 -        }
   6.658 -        else
   6.659 -        {
   6.660 -            /* Try to steal work from a remote CPU's runq. */
   6.661 -            speer = csched_runq_steal(spc, cpu, snext->pri);
   6.662 -            if ( speer != NULL )
   6.663 -            {
   6.664 -                spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.665 -                CSCHED_STAT_CRANK(vcpu_migrate);
   6.666 -                speer->stats.migrate++;
   6.667 -                return speer;
   6.668 -            }
   6.669 -        }
   6.670 -
   6.671 +        /*
   6.672 +         * Any work over there to steal?
   6.673 +         */
   6.674 +        speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
   6.675          spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.676 -    }
   6.677 -
   6.678 -    /*
   6.679 -     * If we failed to find any remotely queued VCPUs to move here,
   6.680 -     * see if it would be more efficient to move any of the running
   6.681 -     * remote VCPUs over here.
   6.682 -     */
   6.683 -    while ( !cpus_empty(loners) )
   6.684 -    {
   6.685 -        /* For each CPU of interest, starting with our neighbour... */
   6.686 -        peer_cpu = next_cpu(peer_cpu, loners);
   6.687 -        if ( peer_cpu == NR_CPUS )
   6.688 -            peer_cpu = first_cpu(loners);
   6.689 -
   6.690 -        cpu_clear(peer_cpu, loners);
   6.691 -
   6.692 -        if ( !spin_trylock(&per_cpu(schedule_data, peer_cpu).schedule_lock) )
   6.693 -        {
   6.694 -            CSCHED_STAT_CRANK(steal_trylock_failed);
   6.695 -            continue;
   6.696 -        }
   6.697 -
   6.698 -        peer_vcpu = per_cpu(schedule_data, peer_cpu).curr;
   6.699 -        spc = CSCHED_PCPU(peer_cpu);
   6.700 -
   6.701 -        /* Signal the first candidate only. */
   6.702 -        if ( !is_idle_vcpu(peer_vcpu) &&
   6.703 -             is_idle_vcpu(__runq_elem(spc->runq.next)->vcpu) &&
   6.704 -             __csched_running_vcpu_is_stealable(cpu, peer_vcpu) )
   6.705 -        {
   6.706 -            set_bit(_VCPUF_migrating, &peer_vcpu->vcpu_flags);
   6.707 -            spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.708 -
   6.709 -            CSCHED_STAT_CRANK(steal_loner_signal);
   6.710 -            cpu_raise_softirq(peer_cpu, SCHEDULE_SOFTIRQ);
   6.711 -            break;
   6.712 -        }
   6.713 -
   6.714 -        spin_unlock(&per_cpu(schedule_data, peer_cpu).schedule_lock);
   6.715 +        if ( speer != NULL )
   6.716 +            return speer;
   6.717      }
   6.718  
   6.719      /* Failed to find more important work elsewhere... */
   6.720 @@ -1270,7 +1173,6 @@ csched_schedule(s_time_t now)
   6.721      ret.task = snext->vcpu;
   6.722  
   6.723      CSCHED_VCPU_CHECK(ret.task);
   6.724 -
   6.725      return ret;
   6.726  }
   6.727  
   6.728 @@ -1287,14 +1189,16 @@ csched_dump_vcpu(struct csched_vcpu *svc
   6.729  
   6.730      if ( sdom )
   6.731      {
   6.732 -        printk(" credit=%i (%d+%u) {a/i=%u/%u m=%u w=%u}",
   6.733 -            atomic_read(&svc->credit),
   6.734 -            svc->stats.credit_last,
   6.735 -            svc->stats.credit_incr,
   6.736 -            svc->stats.state_active,
   6.737 -            svc->stats.state_idle,
   6.738 -            svc->stats.migrate,
   6.739 -            sdom->weight);
   6.740 +        printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
   6.741 +#ifdef CSCHED_STATS
   6.742 +        printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
   6.743 +                svc->stats.credit_last,
   6.744 +                svc->stats.credit_incr,
   6.745 +                svc->stats.state_active,
   6.746 +                svc->stats.state_idle,
   6.747 +                svc->stats.migrate_q,
   6.748 +                svc->stats.migrate_r);
   6.749 +#endif
   6.750      }
   6.751  
   6.752      printk("\n");
     7.1 --- a/xen/include/asm-x86/shadow.h	Tue Dec 12 14:35:07 2006 -0600
     7.2 +++ b/xen/include/asm-x86/shadow.h	Wed Dec 13 10:05:07 2006 +0000
     7.3 @@ -540,6 +540,9 @@ extern int shadow_remove_write_access(st
     7.4   * Returns non-zero if we need to flush TLBs. */
     7.5  extern int shadow_remove_all_mappings(struct vcpu *v, mfn_t target_mfn);
     7.6  
     7.7 +/* Remove all mappings from the shadows. */
     7.8 +extern void shadow_blow_tables(struct domain *d);
     7.9 +
    7.10  void
    7.11  shadow_remove_all_shadows_and_parents(struct vcpu *v, mfn_t gmfn);
    7.12  /* This is a HVM page that we thing is no longer a pagetable.