]> xenbits.xensource.com Git - people/dwmw2/xen.git/commitdiff
xen: merge temporary vcpu pinning scenarios
authorJuergen Gross <jgross@suse.com>
Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
committerJan Beulich <jbeulich@suse.com>
Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
Today there are two scenarios which are pinning vcpus temporarily to
a single physical cpu:

- wait_event() handling
- SCHEDOP_pin_override handling

Each of those cases are handled independently today using their own
temporary cpumask to save the old affinity settings.

The two cases can be combined as the first case will only pin a vcpu to
the physical cpu it is already running on, while SCHEDOP_pin_override is
allowed to fail.

So merge the two temporary pinning scenarios by only using one cpumask
and a per-vcpu bitmask for specifying which of the scenarios is
currently active (they are both allowed to be active for the same vcpu).

Note that we don't need to call domain_update_node_affinity() as we
are only pinning for a brief period of time.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Dario Faggioli <dfaggioli@suse.com>
xen/common/domain.c
xen/common/domctl.c
xen/common/schedule.c
xen/common/wait.c
xen/include/xen/sched.h

index bc56a51815a33f5c8ab5cd0176693b50d370e8ee..e8e850796e885169470e332db4e762e0276b6cf6 100644 (file)
@@ -1267,6 +1267,7 @@ int vcpu_reset(struct vcpu *v)
     v->async_exception_mask = 0;
     memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
 #endif
+    v->affinity_broken = 0;
     clear_bit(_VPF_blocked, &v->pause_flags);
     clear_bit(_VPF_in_reset, &v->pause_flags);
 
index 72a44953d0bc677881aa892013e767c55c8a3710..fa260ce5fb419ae2ce16c066873bc3863c3c9270 100644 (file)
@@ -654,7 +654,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 
             /* Undo a stuck SCHED_pin_override? */
             if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
-                vcpu_pin_override(v, -1);
+                vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
 
             ret = 0;
 
index af22c33387281e75f33d6af62cfcf0e6d2acd669..7b7158175665e1e835ecf1e94429c316650f30d3 100644 (file)
@@ -1106,43 +1106,59 @@ void watchdog_domain_destroy(struct domain *d)
         kill_timer(&d->watchdog_timer[i]);
 }
 
-int vcpu_pin_override(struct vcpu *v, int cpu)
+/*
+ * Pin a vcpu temporarily to a specific CPU (or restore old pinning state if
+ * cpu is NR_CPUS).
+ * Temporary pinning can be done due to two reasons, which may be nested:
+ * - VCPU_AFFINITY_OVERRIDE (requested by guest): is allowed to fail in case
+ *   of a conflict (e.g. in case cpupool doesn't include requested CPU, or
+ *   another conflicting temporary pinning is already in effect.
+ * - VCPU_AFFINITY_WAIT (called by wait_event()): only used to pin vcpu to the
+ *   CPU it is just running on. Can't fail if used properly.
+ */
+int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
 {
     spinlock_t *lock;
     int ret = -EINVAL;
+    bool migrate;
 
     lock = vcpu_schedule_lock_irq(v);
 
-    if ( cpu < 0 )
+    if ( cpu == NR_CPUS )
     {
-        if ( v->affinity_broken )
+        if ( v->affinity_broken & reason )
         {
-            sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
-            v->affinity_broken = 0;
             ret = 0;
+            v->affinity_broken &= ~reason;
         }
+        if ( !ret && !v->affinity_broken )
+            sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
     }
     else if ( cpu < nr_cpu_ids )
     {
-        if ( v->affinity_broken )
+        if ( (v->affinity_broken & reason) ||
+             (v->affinity_broken && v->processor != cpu) )
             ret = -EBUSY;
         else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
         {
-            cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
-            v->affinity_broken = 1;
-            sched_set_affinity(v, cpumask_of(cpu), NULL);
+            if ( !v->affinity_broken )
+            {
+                cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
+                sched_set_affinity(v, cpumask_of(cpu), NULL);
+            }
+            v->affinity_broken |= reason;
             ret = 0;
         }
     }
 
-    if ( ret == 0 )
+    migrate = !ret && !cpumask_test_cpu(v->processor, v->cpu_hard_affinity);
+    if ( migrate )
         vcpu_migrate_start(v);
 
     vcpu_schedule_unlock_irq(lock, v);
 
-    domain_update_node_affinity(v->domain);
-
-    vcpu_migrate_finish(v);
+    if ( migrate )
+        vcpu_migrate_finish(v);
 
     return ret;
 }
@@ -1258,6 +1274,7 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     case SCHEDOP_pin_override:
     {
         struct sched_pin_override sched_pin_override;
+        unsigned int cpu;
 
         ret = -EPERM;
         if ( !is_hardware_domain(current->domain) )
@@ -1267,7 +1284,12 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( copy_from_guest(&sched_pin_override, arg, 1) )
             break;
 
-        ret = vcpu_pin_override(current, sched_pin_override.pcpu);
+        ret = -EINVAL;
+        if ( sched_pin_override.pcpu >= NR_CPUS )
+           break;
+
+        cpu = sched_pin_override.pcpu < 0 ? NR_CPUS : sched_pin_override.pcpu;
+        ret = vcpu_temporary_affinity(current, cpu, VCPU_AFFINITY_OVERRIDE);
 
         break;
     }
index 4f830a14e8052a69ce0983480a1f89f2a4762a8a..3fc5f68611eb14d9a2f05ee718c6fa00d06c21eb 100644 (file)
@@ -34,8 +34,6 @@ struct waitqueue_vcpu {
      */
     void *esp;
     char *stack;
-    cpumask_t saved_affinity;
-    unsigned int wakeup_cpu;
 #endif
 };
 
@@ -131,12 +129,10 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
     ASSERT(wqv->esp == 0);
 
     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
-    wqv->wakeup_cpu = smp_processor_id();
-    cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
-    if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+    if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) )
     {
         gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
-        domain_crash(current->domain);
+        domain_crash(curr->domain);
 
         for ( ; ; )
             do_softirq();
@@ -170,7 +166,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
     if ( unlikely(wqv->esp == 0) )
     {
         gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
-        domain_crash(current->domain);
+        domain_crash(curr->domain);
 
         for ( ; ; )
             do_softirq();
@@ -182,30 +178,24 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
 static void __finish_wait(struct waitqueue_vcpu *wqv)
 {
     wqv->esp = NULL;
-    (void)vcpu_set_hard_affinity(current, &wqv->saved_affinity);
+    vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
 }
 
 void check_wakeup_from_wait(void)
 {
-    struct waitqueue_vcpu *wqv = current->waitqueue_vcpu;
+    struct vcpu *curr = current;
+    struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
 
     ASSERT(list_empty(&wqv->list));
 
     if ( likely(wqv->esp == NULL) )
         return;
 
-    /* Check if we woke up on the wrong CPU. */
-    if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+    /* Check if we are still pinned. */
+    if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) )
     {
-        /* Re-set VCPU affinity and re-enter the scheduler. */
-        struct vcpu *curr = current;
-        cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
-        if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
-        {
-            gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
-            domain_crash(current->domain);
-        }
-        wait(); /* takes us back into the scheduler */
+        gdprintk(XENLOG_ERR, "vcpu affinity lost\n");
+        domain_crash(curr->domain);
     }
 
     /*
index c197e93d735ec1fe3e0e0e3e590b6b546e40414e..9578628c6a880d9f785eadf725fc9c583aaaa00f 100644 (file)
@@ -200,7 +200,9 @@ struct vcpu
     /* VCPU is paused following shutdown request (d->is_shutting_down)? */
     bool             paused_for_shutdown;
     /* VCPU need affinity restored */
-    bool             affinity_broken;
+    uint8_t          affinity_broken;
+#define VCPU_AFFINITY_OVERRIDE    0x01
+#define VCPU_AFFINITY_WAIT        0x02
 
     /* A hypercall has been preempted. */
     bool             hcall_preempted;
@@ -245,7 +247,7 @@ struct vcpu
 
     /* Bitmask of CPUs on which this VCPU may run. */
     cpumask_var_t    cpu_hard_affinity;
-    /* Used to restore affinity across S3. */
+    /* Used to save affinity during temporary pinning. */
     cpumask_var_t    cpu_hard_affinity_saved;
 
     /* Bitmask of CPUs on which this VCPU prefers to run. */
@@ -873,10 +875,10 @@ int cpu_disable_scheduler(unsigned int cpu);
 /* We need it in dom0_setup_vcpu */
 void sched_set_affinity(struct vcpu *v, const cpumask_t *hard,
                         const cpumask_t *soft);
+int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
 int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
 void restore_vcpu_affinity(struct domain *d);
-int vcpu_pin_override(struct vcpu *v, int cpu);
 
 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
 uint64_t get_cpu_idle_time(unsigned int cpu);