xen: merge temporary vcpu pinning scenarios

author Juergen Gross <jgross@suse.com>

Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)

committer Jan Beulich <jbeulich@suse.com>

Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
author Juergen Gross <jgross@suse.com>
Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
committer Jan Beulich <jbeulich@suse.com>
Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
diff --git a/xen/common/domain.c b/xen/common/domain.c

index bc56a51815a33f5c8ab5cd0176693b50d370e8ee..e8e850796e885169470e332db4e762e0276b6cf6 100644 (file)
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -1267,6 +1267,7 @@ int vcpu_reset(struct vcpu *v)
      v->async_exception_mask = 0;
      memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
  #endif
+    v->affinity_broken = 0;
      clear_bit(_VPF_blocked, &v->pause_flags);
      clear_bit(_VPF_in_reset, &v->pause_flags);
  
diff --git a/xen/common/domctl.c b/xen/common/domctl.c

index 72a44953d0bc677881aa892013e767c55c8a3710..fa260ce5fb419ae2ce16c066873bc3863c3c9270 100644 (file)
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -654,7 +654,7 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
  
              /* Undo a stuck SCHED_pin_override? */
              if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
-                vcpu_pin_override(v, -1);
+                vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
  
              ret = 0;
  
diff --git a/xen/common/schedule.c b/xen/common/schedule.c

index af22c33387281e75f33d6af62cfcf0e6d2acd669..7b7158175665e1e835ecf1e94429c316650f30d3 100644 (file)
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1106,43 +1106,59 @@ void watchdog_domain_destroy(struct domain *d)
          kill_timer(&d->watchdog_timer[i]);
  }
  
-int vcpu_pin_override(struct vcpu *v, int cpu)
+/*
+ * Pin a vcpu temporarily to a specific CPU (or restore old pinning state if
+ * cpu is NR_CPUS).
+ * Temporary pinning can be done due to two reasons, which may be nested:
+ * - VCPU_AFFINITY_OVERRIDE (requested by guest): is allowed to fail in case
+ *   of a conflict (e.g. in case cpupool doesn't include requested CPU, or
+ *   another conflicting temporary pinning is already in effect.
+ * - VCPU_AFFINITY_WAIT (called by wait_event()): only used to pin vcpu to the
+ *   CPU it is just running on. Can't fail if used properly.
+ */
+int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
  {
      spinlock_t *lock;
      int ret = -EINVAL;
+    bool migrate;
  
      lock = vcpu_schedule_lock_irq(v);
  
-    if ( cpu < 0 )
+    if ( cpu == NR_CPUS )
      {
-        if ( v->affinity_broken )
+        if ( v->affinity_broken & reason )
          {
-            sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
-            v->affinity_broken = 0;
              ret = 0;
+            v->affinity_broken &= ~reason;
          }
+        if ( !ret && !v->affinity_broken )
+            sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
      }
      else if ( cpu < nr_cpu_ids )
      {
-        if ( v->affinity_broken )
+        if ( (v->affinity_broken & reason) ||
+             (v->affinity_broken && v->processor != cpu) )
              ret = -EBUSY;
          else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
          {
-            cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
-            v->affinity_broken = 1;
-            sched_set_affinity(v, cpumask_of(cpu), NULL);
+            if ( !v->affinity_broken )
+            {
+                cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
+                sched_set_affinity(v, cpumask_of(cpu), NULL);
+            }
+            v->affinity_broken |= reason;
              ret = 0;
          }
      }
  
-    if ( ret == 0 )
+    migrate = !ret && !cpumask_test_cpu(v->processor, v->cpu_hard_affinity);
+    if ( migrate )
          vcpu_migrate_start(v);
  
      vcpu_schedule_unlock_irq(lock, v);
  
-    domain_update_node_affinity(v->domain);
-
-    vcpu_migrate_finish(v);
+    if ( migrate )
+        vcpu_migrate_finish(v);
  
      return ret;
  }
@@ -1258,6 +1274,7 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
      case SCHEDOP_pin_override:
      {
          struct sched_pin_override sched_pin_override;
+        unsigned int cpu;
  
          ret = -EPERM;
          if ( !is_hardware_domain(current->domain) )
@@ -1267,7 +1284,12 @@ ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
          if ( copy_from_guest(&sched_pin_override, arg, 1) )
              break;
  
-        ret = vcpu_pin_override(current, sched_pin_override.pcpu);
+        ret = -EINVAL;
+        if ( sched_pin_override.pcpu >= NR_CPUS )
+           break;
+
+        cpu = sched_pin_override.pcpu < 0 ? NR_CPUS : sched_pin_override.pcpu;
+        ret = vcpu_temporary_affinity(current, cpu, VCPU_AFFINITY_OVERRIDE);
  
          break;
      }
diff --git a/xen/common/wait.c b/xen/common/wait.c

index 4f830a14e8052a69ce0983480a1f89f2a4762a8a..3fc5f68611eb14d9a2f05ee718c6fa00d06c21eb 100644 (file)
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -34,8 +34,6 @@ struct waitqueue_vcpu {
       */
      void *esp;
      char *stack;
-    cpumask_t saved_affinity;
-    unsigned int wakeup_cpu;
  #endif
  };
  
@@ -131,12 +129,10 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
      ASSERT(wqv->esp == 0);
  
      /* Save current VCPU affinity; force wakeup on *this* CPU only. */
-    wqv->wakeup_cpu = smp_processor_id();
-    cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
-    if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+    if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) )
      {
          gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
-        domain_crash(current->domain);
+        domain_crash(curr->domain);
  
          for ( ; ; )
              do_softirq();
@@ -170,7 +166,7 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
      if ( unlikely(wqv->esp == 0) )
      {
          gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
-        domain_crash(current->domain);
+        domain_crash(curr->domain);
  
          for ( ; ; )
              do_softirq();
@@ -182,30 +178,24 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
  static void __finish_wait(struct waitqueue_vcpu *wqv)
  {
      wqv->esp = NULL;
-    (void)vcpu_set_hard_affinity(current, &wqv->saved_affinity);
+    vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
  }
  
  void check_wakeup_from_wait(void)
  {
-    struct waitqueue_vcpu *wqv = current->waitqueue_vcpu;
+    struct vcpu *curr = current;
+    struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
  
      ASSERT(list_empty(&wqv->list));
  
      if ( likely(wqv->esp == NULL) )
          return;
  
-    /* Check if we woke up on the wrong CPU. */
-    if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+    /* Check if we are still pinned. */
+    if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) )
      {
-        /* Re-set VCPU affinity and re-enter the scheduler. */
-        struct vcpu *curr = current;
-        cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
-        if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
-        {
-            gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
-            domain_crash(current->domain);
-        }
-        wait(); /* takes us back into the scheduler */
+        gdprintk(XENLOG_ERR, "vcpu affinity lost\n");
+        domain_crash(curr->domain);
      }
  
      /*
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h

index c197e93d735ec1fe3e0e0e3e590b6b546e40414e..9578628c6a880d9f785eadf725fc9c583aaaa00f 100644 (file)
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -200,7 +200,9 @@ struct vcpu
      /* VCPU is paused following shutdown request (d->is_shutting_down)? */
      bool             paused_for_shutdown;
      /* VCPU need affinity restored */
-    bool             affinity_broken;
+    uint8_t          affinity_broken;
+#define VCPU_AFFINITY_OVERRIDE    0x01
+#define VCPU_AFFINITY_WAIT        0x02
  
      /* A hypercall has been preempted. */
      bool             hcall_preempted;
@@ -245,7 +247,7 @@ struct vcpu
  
      /* Bitmask of CPUs on which this VCPU may run. */
      cpumask_var_t    cpu_hard_affinity;
-    /* Used to restore affinity across S3. */
+    /* Used to save affinity during temporary pinning. */
      cpumask_var_t    cpu_hard_affinity_saved;
  
      /* Bitmask of CPUs on which this VCPU prefers to run. */
@@ -873,10 +875,10 @@ int cpu_disable_scheduler(unsigned int cpu);
  /* We need it in dom0_setup_vcpu */
  void sched_set_affinity(struct vcpu *v, const cpumask_t *hard,
                          const cpumask_t *soft);
+int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
  int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
  int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
  void restore_vcpu_affinity(struct domain *d);
-int vcpu_pin_override(struct vcpu *v, int cpu);
  
  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
  uint64_t get_cpu_idle_time(unsigned int cpu);
author	Juergen Gross <jgross@suse.com>
	Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
committer	Jan Beulich <jbeulich@suse.com>
	Fri, 26 Jul 2019 08:45:49 +0000 (10:45 +0200)
xen/common/domain.c		patch \| blob \| blame \| history
xen/common/domctl.c		patch \| blob \| blame \| history
xen/common/schedule.c		patch \| blob \| blame \| history
xen/common/wait.c		patch \| blob \| blame \| history
xen/include/xen/sched.h		patch \| blob \| blame \| history