.switch_sched = sched_idle_switch_sched,
};
+static inline struct vcpu *unit2vcpu_cpu(const struct sched_unit *unit,
+ unsigned int cpu)
+{
+ unsigned int idx = unit->unit_id + per_cpu(sched_res_idx, cpu);
+ const struct domain *d = unit->domain;
+
+ return (idx < d->max_vcpus) ? d->vcpu[idx] : NULL;
+}
+
static inline struct vcpu *sched_unit2vcpu_cpu(const struct sched_unit *unit,
unsigned int cpu)
{
- return unit->domain->vcpu[unit->unit_id + per_cpu(sched_res_idx, cpu)];
+ struct vcpu *v = unit2vcpu_cpu(unit, cpu);
+
+ return (v && v->new_state == RUNSTATE_running) ? v : idle_vcpu[cpu];
}
static inline struct scheduler *dom_scheduler(const struct domain *d)
trace_runstate_change(v, new_state);
- unit->runstate_cnt[v->runstate.state]--;
- unit->runstate_cnt[new_state]++;
+ if ( !is_idle_vcpu(v) )
+ {
+ unit->runstate_cnt[v->runstate.state]--;
+ unit->runstate_cnt[new_state]++;
+ }
delta = new_entry_time - v->runstate.state_entry_time;
if ( delta > 0 )
v->runstate.state = new_state;
}
-static inline void sched_unit_runstate_change(struct sched_unit *unit,
- bool running, s_time_t new_entry_time)
+void sched_guest_idle(void (*idle) (void), unsigned int cpu)
{
- struct vcpu *v;
-
- for_each_sched_unit_vcpu ( unit, v )
- {
- if ( running )
- vcpu_runstate_change(v, v->new_state, new_entry_time);
- else
- vcpu_runstate_change(v,
- ((v->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
- (vcpu_runnable(v) ? RUNSTATE_runnable : RUNSTATE_offline)),
- new_entry_time);
- }
+ /*
+ * Another vcpu of the unit is active in guest context while this one is
+ * idle. In case of a scheduling event we don't want to have high latencies
+ * due to a cpu needing to wake up from deep C state for joining the
+ * rendezvous, so avoid those deep C states by incrementing the urgent
+ * count of the cpu.
+ */
+ atomic_inc(&per_cpu(sched_urgent_count, cpu));
+ idle();
+ atomic_dec(&per_cpu(sched_urgent_count, cpu));
}
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
if ( is_idle_domain(d) )
{
get_sched_res(v->processor)->curr = unit;
+ get_sched_res(v->processor)->sched_unit_idle = unit;
v->is_running = 1;
unit->is_running = true;
unit->state_entry_time = NOW();
*
* sched_unit_migrate_finish() will do the work now if it can, or simply
* return if it can't (because unit is still running); in that case
- * sched_unit_migrate_finish() will be called by context_saved().
+ * sched_unit_migrate_finish() will be called by unit_context_saved().
*/
static void sched_unit_migrate_start(struct sched_unit *unit)
{
/*
* If the unit is currently running, this will be handled by
- * context_saved(); and in any case, if the bit is cleared, then
+ * unit_context_saved(); and in any case, if the bit is cleared, then
* someone else has already done the work so we don't need to.
*/
if ( unit->is_running )
struct sched_unit *next, struct sched_unit *prev,
s_time_t now)
{
- sr->curr = next;
-
- TRACE_3D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id, prev->unit_id,
- now - prev->state_entry_time);
- TRACE_4D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id, next->unit_id,
- (next->vcpu_list->runstate.state == RUNSTATE_runnable) ?
- (now - next->state_entry_time) : 0, prev->next_time);
+ unsigned int cpu;
ASSERT(unit_running(prev));
- TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->unit_id,
- next->domain->domain_id, next->unit_id);
+ if ( prev != next )
+ {
+ sr->curr = next;
+ sr->prev = prev;
- sched_unit_runstate_change(prev, false, now);
+ TRACE_3D(TRC_SCHED_SWITCH_INFPREV, prev->domain->domain_id,
+ prev->unit_id, now - prev->state_entry_time);
+ TRACE_4D(TRC_SCHED_SWITCH_INFNEXT, next->domain->domain_id,
+ next->unit_id,
+ (next->vcpu_list->runstate.state == RUNSTATE_runnable) ?
+ (now - next->state_entry_time) : 0, prev->next_time);
+ TRACE_4D(TRC_SCHED_SWITCH, prev->domain->domain_id, prev->unit_id,
+ next->domain->domain_id, next->unit_id);
- ASSERT(!unit_running(next));
- sched_unit_runstate_change(next, true, now);
+ ASSERT(!unit_running(next));
- /*
- * NB. Don't add any trace records from here until the actual context
- * switch, else lost_records resume will not work properly.
- */
+ /*
+ * NB. Don't add any trace records from here until the actual context
+ * switch, else lost_records resume will not work properly.
+ */
+
+ ASSERT(!next->is_running);
+ next->is_running = true;
+ next->state_entry_time = now;
+
+ if ( is_idle_unit(prev) )
+ {
+ prev->runstate_cnt[RUNSTATE_running] = 0;
+ prev->runstate_cnt[RUNSTATE_runnable] = sched_granularity;
+ }
+ if ( is_idle_unit(next) )
+ {
+ next->runstate_cnt[RUNSTATE_running] = sched_granularity;
+ next->runstate_cnt[RUNSTATE_runnable] = 0;
+ }
+ }
+
+ for_each_cpu ( cpu, sr->cpus )
+ {
+ struct vcpu *vprev = get_cpu_current(cpu);
+ struct vcpu *vnext = sched_unit2vcpu_cpu(next, cpu);
+
+ if ( vprev != vnext || vprev->runstate.state != vnext->new_state )
+ {
+ vcpu_runstate_change(vprev,
+ ((vprev->pause_flags & VPF_blocked) ? RUNSTATE_blocked :
+ (vcpu_runnable(vprev) ? RUNSTATE_runnable : RUNSTATE_offline)),
+ now);
+ vcpu_runstate_change(vnext, vnext->new_state, now);
+ }
- ASSERT(!next->is_running);
- next->vcpu_list->is_running = 1;
- next->is_running = true;
- next->state_entry_time = now;
+ vnext->is_running = 1;
+
+ if ( is_idle_vcpu(vnext) )
+ vnext->sched_unit = next;
+ }
}
static bool sched_tasklet_check_cpu(unsigned int cpu)
if ( prev->next_time >= 0 ) /* -ve means no limit */
set_timer(&sr->s_timer, now + prev->next_time);
- if ( likely(prev != next) )
- sched_switch_units(sr, next, prev, now);
+ sched_switch_units(sr, next, prev, now);
return next;
}
-static void context_saved(struct vcpu *prev)
+static void vcpu_context_saved(struct vcpu *vprev, struct vcpu *vnext)
{
- struct sched_unit *unit = prev->sched_unit;
-
/* Clear running flag /after/ writing context to memory. */
smp_wmb();
- prev->is_running = 0;
+ if ( vprev != vnext )
+ vprev->is_running = 0;
+}
+
+static void unit_context_saved(struct sched_resource *sr)
+{
+ struct sched_unit *unit = sr->prev;
+
+ if ( !unit )
+ return;
+
unit->is_running = false;
unit->state_entry_time = NOW();
+ sr->prev = NULL;
/* Check for migration request /after/ clearing running flag. */
smp_mb();
- sched_context_saved(vcpu_scheduler(prev), unit);
+ sched_context_saved(unit_scheduler(unit), unit);
- sched_unit_migrate_finish(unit);
+ /* Idle never migrates and idle vcpus might belong to other units. */
+ if ( !is_idle_unit(unit) )
+ sched_unit_migrate_finish(unit);
}
/*
* The counter will be 0 in case no rendezvous is needed. For the rendezvous
* case it is initialised to the number of cpus to rendezvous plus 1. Each
* member entering decrements the counter. The last one will decrement it to
- * 1 and perform the final needed action in that case (call of context_saved()
- * if vcpu was switched), and then set the counter to zero. The other members
+ * 1 and perform the final needed action in that case (call of
+ * unit_context_saved()), and then set the counter to zero. The other members
* will wait until the counter becomes zero until they proceed.
*/
void sched_context_switched(struct vcpu *vprev, struct vcpu *vnext)
{
struct sched_unit *next = vnext->sched_unit;
+ struct sched_resource *sr = get_sched_res(smp_processor_id());
if ( atomic_read(&next->rendezvous_out_cnt) )
{
int cnt = atomic_dec_return(&next->rendezvous_out_cnt);
- /* Call context_saved() before releasing other waiters. */
+ vcpu_context_saved(vprev, vnext);
+
+ /* Call unit_context_saved() before releasing other waiters. */
if ( cnt == 1 )
{
- if ( vprev != vnext )
- context_saved(vprev);
+ unit_context_saved(sr);
atomic_set(&next->rendezvous_out_cnt, 0);
}
else
while ( atomic_read(&next->rendezvous_out_cnt) )
cpu_relax();
}
- else if ( vprev != vnext && sched_granularity == 1 )
- context_saved(vprev);
+ else
+ {
+ vcpu_context_saved(vprev, vnext);
+ if ( sched_granularity == 1 )
+ unit_context_saved(sr);
+ }
+
+ if ( is_idle_vcpu(vprev) && vprev != vnext )
+ vprev->sched_unit = sr->sched_unit_idle;
}
static void sched_context_switch(struct vcpu *vprev, struct vcpu *vnext,
- s_time_t now)
+ bool reset_idle_unit, s_time_t now)
{
if ( unlikely(vprev == vnext) )
{
now - vprev->runstate.state_entry_time,
vprev->sched_unit->next_time);
sched_context_switched(vprev, vnext);
+
+ /*
+ * We are switching from a non-idle to an idle unit.
+ * A vcpu of the idle unit might have been running before due to
+ * the guest vcpu being blocked. We must adjust the unit of the idle
+ * vcpu which might have been set to the guest's one.
+ */
+ if ( reset_idle_unit )
+ vnext->sched_unit =
+ get_sched_res(smp_processor_id())->sched_unit_idle;
+
trace_continue_running(vnext);
return continue_running(vprev);
}
pcpu_schedule_unlock_irq(*lock, cpu);
raise_softirq(SCHED_SLAVE_SOFTIRQ);
- sched_context_switch(vprev, vprev, now);
+ sched_context_switch(vprev, vprev, false, now);
return NULL; /* ARM only. */
}
pcpu_schedule_unlock_irq(lock, cpu);
- sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu), now);
+ sched_context_switch(vprev, sched_unit2vcpu_cpu(next, cpu),
+ is_idle_unit(next) && !is_idle_unit(prev), now);
}
/*
pcpu_schedule_unlock_irq(lock, cpu);
vnext = sched_unit2vcpu_cpu(next, cpu);
- sched_context_switch(vprev, vnext, now);
+ sched_context_switch(vprev, vnext,
+ !is_idle_unit(prev) && is_idle_unit(next), now);
}
/* The scheduler timer: force a run through the scheduler */
*/
sr->curr = idle_vcpu[cpu]->sched_unit;
+ sr->sched_unit_idle = idle_vcpu[cpu]->sched_unit;
sr->sched_priv = NULL;
if ( vcpu_create(idle_domain, 0) == NULL )
BUG();
get_sched_res(0)->curr = idle_vcpu[0]->sched_unit;
+ get_sched_res(0)->sched_unit_idle = idle_vcpu[0]->sched_unit;
}
/*