xen: sched_credit: improve tickling of idle CPUs

author Dario Faggioli <dario.faggioli@citrix.com>

Tue, 18 Dec 2012 18:10:57 +0000 (18:10 +0000)

committer Dario Faggioli <dario.faggioli@citrix.com>

Tue, 18 Dec 2012 18:10:57 +0000 (18:10 +0000)
author Dario Faggioli <dario.faggioli@citrix.com>
Tue, 18 Dec 2012 18:10:57 +0000 (18:10 +0000)
committer Dario Faggioli <dario.faggioli@citrix.com>
Tue, 18 Dec 2012 18:10:57 +0000 (18:10 +0000)
diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c

index 1562955f5774897aacf41c30e2053e4e1528e461..eca0edb4b935e82de3eda403027ef445517ce8a4 100644 (file)
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -134,6 +134,7 @@ struct csched_vcpu {
          uint32_t state_idle;
          uint32_t migrate_q;
          uint32_t migrate_r;
+        uint32_t kicked_away;
      } stats;
  #endif
  };
@@ -251,54 +252,67 @@ __runq_tickle(unsigned int cpu, struct csched_vcpu *new)
  {
      struct csched_vcpu * const cur = CSCHED_VCPU(curr_on_cpu(cpu));
      struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
-    cpumask_t mask;
+    cpumask_t mask, idle_mask;
+    int idlers_empty;
  
      ASSERT(cur);
      cpumask_clear(&mask);
  
-    /* If strictly higher priority than current VCPU, signal the CPU */
-    if ( new->pri > cur->pri )
-    {
-        if ( cur->pri == CSCHED_PRI_IDLE )
-            SCHED_STAT_CRANK(tickle_local_idler);
-        else if ( cur->pri == CSCHED_PRI_TS_OVER )
-            SCHED_STAT_CRANK(tickle_local_over);
-        else if ( cur->pri == CSCHED_PRI_TS_UNDER )
-            SCHED_STAT_CRANK(tickle_local_under);
-        else
-            SCHED_STAT_CRANK(tickle_local_other);
-
-        cpumask_set_cpu(cpu, &mask);
-    }
+    idlers_empty = cpumask_empty(prv->idlers);
  
      /*
-     * If this CPU has at least two runnable VCPUs, we tickle any idlers to
-     * let them know there is runnable work in the system...
+     * If the pcpu is idle, or there are no idlers and the new
+     * vcpu is a higher priority than the old vcpu, run it here.
+     *
+     * If there are idle cpus, first try to find one suitable to run
+     * new, so we can avoid preempting cur.  If we cannot find a
+     * suitable idler on which to run new, run it here, but try to
+     * find a suitable idler on which to run cur instead.
       */
-    if ( cur->pri > CSCHED_PRI_IDLE )
+    if ( cur->pri == CSCHED_PRI_IDLE
+         || (idlers_empty && new->pri > cur->pri) )
+    {
+        if ( cur->pri != CSCHED_PRI_IDLE )
+            SCHED_STAT_CRANK(tickle_idlers_none);
+        cpumask_set_cpu(cpu, &mask);
+    }
+    else if ( !idlers_empty )
      {
-        if ( cpumask_empty(prv->idlers) )
+        /* Check whether or not there are idlers that can run new */
+        cpumask_and(&idle_mask, prv->idlers, new->vcpu->cpu_affinity);
+
+        /*
+         * If there are no suitable idlers for new, and it's higher
+         * priority than cur, ask the scheduler to migrate cur away.
+         * We have to act like this (instead of just waking some of
+         * the idlers suitable for cur) because cur is running.
+         *
+         * If there are suitable idlers for new, no matter priorities,
+         * leave cur alone (as it is running and is, likely, cache-hot)
+         * and wake some of them (which is waking up and so is, likely,
+         * cache cold anyway).
+         */
+        if ( cpumask_empty(&idle_mask) && new->pri > cur->pri )
          {
              SCHED_STAT_CRANK(tickle_idlers_none);
+            SCHED_VCPU_STAT_CRANK(cur, kicked_away);
+            SCHED_VCPU_STAT_CRANK(cur, migrate_r);
+            SCHED_STAT_CRANK(migrate_kicked_away);
+            set_bit(_VPF_migrating, &cur->vcpu->pause_flags);
+            cpumask_set_cpu(cpu, &mask);
          }
-        else
+        else if ( !cpumask_empty(&idle_mask) )
          {
-            cpumask_t idle_mask;
-
-            cpumask_and(&idle_mask, prv->idlers, new->vcpu->cpu_affinity);
-            if ( !cpumask_empty(&idle_mask) )
+            /* Which of the idlers suitable for new shall we wake up? */
+            SCHED_STAT_CRANK(tickle_idlers_some);
+            if ( opt_tickle_one_idle )
              {
-                SCHED_STAT_CRANK(tickle_idlers_some);
-                if ( opt_tickle_one_idle )
-                {
-                    this_cpu(last_tickle_cpu) = 
-                        cpumask_cycle(this_cpu(last_tickle_cpu), &idle_mask);
-                    cpumask_set_cpu(this_cpu(last_tickle_cpu), &mask);
-                }
-                else
-                    cpumask_or(&mask, &mask, &idle_mask);
+                this_cpu(last_tickle_cpu) =
+                    cpumask_cycle(this_cpu(last_tickle_cpu), &idle_mask);
+                cpumask_set_cpu(this_cpu(last_tickle_cpu), &mask);
              }
-            cpumask_and(&mask, &mask, new->vcpu->cpu_affinity);
+            else
+                cpumask_or(&mask, &mask, &idle_mask);
          }
      }
  
@@ -1456,13 +1470,14 @@ csched_dump_vcpu(struct csched_vcpu *svc)
      {
          printk(" credit=%i [w=%u]", atomic_read(&svc->credit), sdom->weight);
  #ifdef CSCHED_STATS
-        printk(" (%d+%u) {a/i=%u/%u m=%u+%u}",
+        printk(" (%d+%u) {a/i=%u/%u m=%u+%u (k=%u)}",
                  svc->stats.credit_last,
                  svc->stats.credit_incr,
                  svc->stats.state_active,
                  svc->stats.state_idle,
                  svc->stats.migrate_q,
-                svc->stats.migrate_r);
+                svc->stats.migrate_r,
+                svc->stats.kicked_away);
  #endif
      }
  
diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h

index 2d0254cb2965cf1bd80f48b18f5cee0426e838a1..3ac7b45ac313ddda3d52fd4a6050d8e0d54a1344 100644 (file)
--- a/xen/include/xen/perfc_defn.h
+++ b/xen/include/xen/perfc_defn.h
@@ -39,10 +39,6 @@ PERFCOUNTER(vcpu_wake_runnable,     "csched: vcpu_wake_runnable")
  PERFCOUNTER(vcpu_wake_not_runnable, "csched: vcpu_wake_not_runnable")
  PERFCOUNTER(vcpu_park,              "csched: vcpu_park")
  PERFCOUNTER(vcpu_unpark,            "csched: vcpu_unpark")
-PERFCOUNTER(tickle_local_idler,     "csched: tickle_local_idler")
-PERFCOUNTER(tickle_local_over,      "csched: tickle_local_over")
-PERFCOUNTER(tickle_local_under,     "csched: tickle_local_under")
-PERFCOUNTER(tickle_local_other,     "csched: tickle_local_other")
  PERFCOUNTER(tickle_idlers_none,     "csched: tickle_idlers_none")
  PERFCOUNTER(tickle_idlers_some,     "csched: tickle_idlers_some")
  PERFCOUNTER(load_balance_idle,      "csched: load_balance_idle")
@@ -52,6 +48,7 @@ PERFCOUNTER(steal_trylock_failed,   "csched: steal_trylock_failed")
  PERFCOUNTER(steal_peer_idle,        "csched: steal_peer_idle")
  PERFCOUNTER(migrate_queued,         "csched: migrate_queued")
  PERFCOUNTER(migrate_running,        "csched: migrate_running")
+PERFCOUNTER(migrate_kicked_away,    "csched: migrate_kicked_away")
  PERFCOUNTER(vcpu_hot,               "csched: vcpu_hot")
  
  PERFCOUNTER(need_flush_tlb_flush,   "PG_need_flush tlb flushes")
author	Dario Faggioli <dario.faggioli@citrix.com>
	Tue, 18 Dec 2012 18:10:57 +0000 (18:10 +0000)
committer	Dario Faggioli <dario.faggioli@citrix.com>
	Tue, 18 Dec 2012 18:10:57 +0000 (18:10 +0000)
xen/common/sched_credit.c		patch \| blob \| blame \| history
xen/include/xen/perfc_defn.h		patch \| blob \| blame \| history