v->async_exception_mask = 0;
memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
#endif
+ v->affinity_broken = 0;
clear_bit(_VPF_blocked, &v->pause_flags);
clear_bit(_VPF_in_reset, &v->pause_flags);
/* Undo a stuck SCHED_pin_override? */
if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
- vcpu_pin_override(v, -1);
+ vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
ret = 0;
kill_timer(&d->watchdog_timer[i]);
}
-int vcpu_pin_override(struct vcpu *v, int cpu)
+/*
+ * Pin a vcpu temporarily to a specific CPU (or restore old pinning state if
+ * cpu is NR_CPUS).
+ * Temporary pinning can be done due to two reasons, which may be nested:
+ * - VCPU_AFFINITY_OVERRIDE (requested by guest): is allowed to fail in case
+ * of a conflict (e.g. in case cpupool doesn't include requested CPU, or
+ * another conflicting temporary pinning is already in effect.
+ * - VCPU_AFFINITY_WAIT (called by wait_event()): only used to pin vcpu to the
+ * CPU it is just running on. Can't fail if used properly.
+ */
+int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason)
{
spinlock_t *lock;
int ret = -EINVAL;
+ bool migrate;
lock = vcpu_schedule_lock_irq(v);
- if ( cpu < 0 )
+ if ( cpu == NR_CPUS )
{
- if ( v->affinity_broken )
+ if ( v->affinity_broken & reason )
{
- sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
- v->affinity_broken = 0;
ret = 0;
+ v->affinity_broken &= ~reason;
}
+ if ( !ret && !v->affinity_broken )
+ sched_set_affinity(v, v->cpu_hard_affinity_saved, NULL);
}
else if ( cpu < nr_cpu_ids )
{
- if ( v->affinity_broken )
+ if ( (v->affinity_broken & reason) ||
+ (v->affinity_broken && v->processor != cpu) )
ret = -EBUSY;
else if ( cpumask_test_cpu(cpu, VCPU2ONLINE(v)) )
{
- cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
- v->affinity_broken = 1;
- sched_set_affinity(v, cpumask_of(cpu), NULL);
+ if ( !v->affinity_broken )
+ {
+ cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
+ sched_set_affinity(v, cpumask_of(cpu), NULL);
+ }
+ v->affinity_broken |= reason;
ret = 0;
}
}
- if ( ret == 0 )
+ migrate = !ret && !cpumask_test_cpu(v->processor, v->cpu_hard_affinity);
+ if ( migrate )
vcpu_migrate_start(v);
vcpu_schedule_unlock_irq(lock, v);
- domain_update_node_affinity(v->domain);
-
- vcpu_migrate_finish(v);
+ if ( migrate )
+ vcpu_migrate_finish(v);
return ret;
}
case SCHEDOP_pin_override:
{
struct sched_pin_override sched_pin_override;
+ unsigned int cpu;
ret = -EPERM;
if ( !is_hardware_domain(current->domain) )
if ( copy_from_guest(&sched_pin_override, arg, 1) )
break;
- ret = vcpu_pin_override(current, sched_pin_override.pcpu);
+ ret = -EINVAL;
+ if ( sched_pin_override.pcpu >= NR_CPUS )
+ break;
+
+ cpu = sched_pin_override.pcpu < 0 ? NR_CPUS : sched_pin_override.pcpu;
+ ret = vcpu_temporary_affinity(current, cpu, VCPU_AFFINITY_OVERRIDE);
break;
}
*/
void *esp;
char *stack;
- cpumask_t saved_affinity;
- unsigned int wakeup_cpu;
#endif
};
ASSERT(wqv->esp == 0);
/* Save current VCPU affinity; force wakeup on *this* CPU only. */
- wqv->wakeup_cpu = smp_processor_id();
- cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
- if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
+ if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) )
{
gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
- domain_crash(current->domain);
+ domain_crash(curr->domain);
for ( ; ; )
do_softirq();
if ( unlikely(wqv->esp == 0) )
{
gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
- domain_crash(current->domain);
+ domain_crash(curr->domain);
for ( ; ; )
do_softirq();
static void __finish_wait(struct waitqueue_vcpu *wqv)
{
wqv->esp = NULL;
- (void)vcpu_set_hard_affinity(current, &wqv->saved_affinity);
+ vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
}
void check_wakeup_from_wait(void)
{
- struct waitqueue_vcpu *wqv = current->waitqueue_vcpu;
+ struct vcpu *curr = current;
+ struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
ASSERT(list_empty(&wqv->list));
if ( likely(wqv->esp == NULL) )
return;
- /* Check if we woke up on the wrong CPU. */
- if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
+ /* Check if we are still pinned. */
+ if ( unlikely(!(curr->affinity_broken & VCPU_AFFINITY_WAIT)) )
{
- /* Re-set VCPU affinity and re-enter the scheduler. */
- struct vcpu *curr = current;
- cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
- if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
- {
- gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
- domain_crash(current->domain);
- }
- wait(); /* takes us back into the scheduler */
+ gdprintk(XENLOG_ERR, "vcpu affinity lost\n");
+ domain_crash(curr->domain);
}
/*
/* VCPU is paused following shutdown request (d->is_shutting_down)? */
bool paused_for_shutdown;
/* VCPU need affinity restored */
- bool affinity_broken;
+ uint8_t affinity_broken;
+#define VCPU_AFFINITY_OVERRIDE 0x01
+#define VCPU_AFFINITY_WAIT 0x02
/* A hypercall has been preempted. */
bool hcall_preempted;
/* Bitmask of CPUs on which this VCPU may run. */
cpumask_var_t cpu_hard_affinity;
- /* Used to restore affinity across S3. */
+ /* Used to save affinity during temporary pinning. */
cpumask_var_t cpu_hard_affinity_saved;
/* Bitmask of CPUs on which this VCPU prefers to run. */
/* We need it in dom0_setup_vcpu */
void sched_set_affinity(struct vcpu *v, const cpumask_t *hard,
const cpumask_t *soft);
+int vcpu_temporary_affinity(struct vcpu *v, unsigned int cpu, uint8_t reason);
int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
void restore_vcpu_affinity(struct domain *d);
-int vcpu_pin_override(struct vcpu *v, int cpu);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
uint64_t get_cpu_idle_time(unsigned int cpu);