From: Vincent Hanquez Date: Wed, 17 Dec 2008 17:32:02 +0000 (+0000) Subject: patch bcredit X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=4b89226e420ffd004455e1b707043745e24fb42e;p=xenclient%2Fxen.git patch bcredit --- diff --git a/tools/libxc/xc_csched.c b/tools/libxc/xc_csched.c index 4ea986f..e8f590f 100644 --- a/tools/libxc/xc_csched.c +++ b/tools/libxc/xc_csched.c @@ -48,3 +48,41 @@ xc_sched_credit_domain_get( return err; } + +int +xc_sched_bcredit_domain_set( + int xc_handle, + uint32_t domid, + struct xen_domctl_sched_bcredit *sdom) +{ + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t) domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_BCREDIT; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo; + domctl.u.scheduler_op.u.bcredit = *sdom; + + return do_domctl(xc_handle, &domctl); +} + +int +xc_sched_bcredit_domain_get( + int xc_handle, + uint32_t domid, + struct xen_domctl_sched_bcredit *sdom) +{ + DECLARE_DOMCTL; + int err; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t) domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_BCREDIT; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo; + + err = do_domctl(xc_handle, &domctl); + if ( err == 0 ) + *sdom = domctl.u.scheduler_op.u.bcredit; + + return err; +} diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index ef68689..f648de8 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -452,6 +452,14 @@ int xc_sched_credit_domain_get(int xc_handle, uint32_t domid, struct xen_domctl_sched_credit *sdom); +int xc_sched_bcredit_domain_set(int xc_handle, + uint32_t domind, + struct xen_domctl_sched_bcredit *sdom); + +int xc_sched_bcredit_domain_get(int xc_handle, + uint32_t domid, + struct xen_domctl_sched_bcredit *sdom); + /** * This function sends a trigger to a domain. * diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index ee51a7e..77b5f21 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -1317,6 +1317,59 @@ static PyObject *pyxc_sched_credit_domain_get(XcObject *self, PyObject *args) "cap", sdom.cap); } +static PyObject *pyxc_sched_bcredit_domain_set(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t domid; + uint16_t weight; + uint16_t cap; + uint16_t max_boost_period; + uint16_t boost_ratio; + static char *kwd_list[] = { "domid", "bc_weight", "bc_cap", + "bc_max_boost_period", "bc_ratio", NULL }; + static char kwd_type[] = "I|HHhh"; + struct xen_domctl_sched_bcredit sdom; + + weight = 0; + cap = (uint16_t)~0U; + max_boost_period = (uint16_t)~0U; + boost_ratio = (uint16_t)~0U; + if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, + &domid, &weight, &cap, + &max_boost_period, &boost_ratio) ) + return NULL; + + sdom.weight = weight; + sdom.cap = cap; + sdom.max_boost_period = max_boost_period; + sdom.boost_ratio = boost_ratio; + + if ( xc_sched_bcredit_domain_set(self->xc_handle, domid, &sdom) != 0 ) + return pyxc_error_to_exception(); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_sched_bcredit_domain_get(XcObject *self, PyObject *args) +{ + uint32_t domid; + struct xen_domctl_sched_bcredit sdom; + + if( !PyArg_ParseTuple(args, "I", &domid) ) + return NULL; + + if ( xc_sched_bcredit_domain_get(self->xc_handle, domid, &sdom) != 0 ) + return pyxc_error_to_exception(); + + return Py_BuildValue("{s:H,s:H,s:i,s:i}", + "bc_weight", sdom.weight, + "bc_cap", sdom.cap, + "bc_max_boost_period", sdom.max_boost_period, + "bc_ratio", sdom.boost_ratio); +} + static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args) { uint32_t dom; @@ -1733,6 +1786,30 @@ static PyMethodDef pyxc_methods[] = { "Returns: [dict]\n" " weight [short]: domain's scheduling weight\n"}, + { "sched_bcredit_domain_set", + (PyCFunction)pyxc_sched_bcredit_domain_set, + METH_KEYWORDS, "\n" + "Set the scheduling parameters for a domain when running with the\n" + "SMP credit scheduler for client.\n" + " domid [int]: domain id to set\n" + " bc_weight [short]: domain's scheduling weight\n" + " bc_cap [short]: cap\n" + " bc_max_boost_period [short]: upper limit in BOOST priority\n" + " bc_ratio [short]: domain's boost ratio per a CPU\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "sched_bcredit_domain_get", + (PyCFunction)pyxc_sched_bcredit_domain_get, + METH_VARARGS, "\n" + "Get the scheduling parameters for a domain when running with the\n" + "SMP credit scheduler for client.\n" + " domid [int]: domain id to get\n" + "Returns: [dict]\n" + " bc_weight [short]: domain's scheduling weight\n" + " bc_cap [short]: cap\n" + " bc_max_boost_period [short]: upper limit in BOOST priority\n" + " bc_ratio [short]: domain's boost ratio per a CPU\n"}, + { "evtchn_alloc_unbound", (PyCFunction)pyxc_evtchn_alloc_unbound, METH_VARARGS | METH_KEYWORDS, "\n" @@ -2048,6 +2125,7 @@ PyMODINIT_FUNC initxc(void) /* Expose some libxc constants to Python */ PyModule_AddIntConstant(m, "XEN_SCHEDULER_SEDF", XEN_SCHEDULER_SEDF); PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT", XEN_SCHEDULER_CREDIT); + PyModule_AddIntConstant(m, "XEN_SCHEDULER_BCREDIT", XEN_SCHEDULER_BCREDIT); } diff --git a/tools/python/xen/xend/XendAPI.py b/tools/python/xen/xend/XendAPI.py index 42e131b..9124a17 100644 --- a/tools/python/xen/xend/XendAPI.py +++ b/tools/python/xen/xend/XendAPI.py @@ -1510,6 +1510,16 @@ class XendAPI(object): cap = xeninfo.info['vcpus_params']['cap'] xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap) + if 'bc_weight' in xeninfo.info['vcpus_params'] \ + and 'bc_cap' in xeninfo.info['vcpus_params'] \ + and 'bc_max_boost_period' in xeninfo.info['vcpus_params'] \ + and 'bc_ratio' in xeninfo.info['vcpus_params']: + bc_weight = xeninfo.info['vcpus_params']['bc_weight'] + bc_cap = xeninfo.info['vcpus_params']['bc_cap'] + bc_max_boost_period = xeninfo.info['vcpus_params']['bc_max_boost_period'] + bc_ratio = xeninfo.info['vcpus_params']['bc_ratio'] + xendom.domain_sched_bcredit_set(xeninfo.getDomid(), bc_weight, bc_cap, bc_max_boost_period, bc_ratio) + def VM_set_VCPUs_number_live(self, _, vm_ref, num): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) dom.setVCpuCount(int(num)) diff --git a/tools/python/xen/xend/XendConfig.py b/tools/python/xen/xend/XendConfig.py index 86b15d5..cc92b9a 100644 --- a/tools/python/xen/xend/XendConfig.py +++ b/tools/python/xen/xend/XendConfig.py @@ -589,6 +589,15 @@ class XendConfig(dict): int(sxp.child_value(sxp_cfg, "cpu_weight", 256)) cfg["vcpus_params"]["cap"] = \ int(sxp.child_value(sxp_cfg, "cpu_cap", 0)) + # For boost credit scheduler + cfg["vcpus_params"]["bc_weight"] = \ + int(sxp.child_value(sxp_cfg, "cpu_bc_weight", 256)) + cfg["vcpus_params"]["bc_cap"] = \ + int(sxp.child_value(sxp_cfg, "cpu_bc_cap", 0)) + cfg["vcpus_params"]["bc_max_boost_period"] = \ + int(sxp.child_value(sxp_cfg, "cpu_bc_max_boost_period", 0)) + cfg["vcpus_params"]["bc_ratio"] = \ + int(sxp.child_value(sxp_cfg, "cpu_bc_ratio", 0)) # Only extract options we know about. extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \ diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py index 9faebe9..aa6506c 100644 --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -1591,6 +1591,99 @@ class XendDomain: log.exception(ex) raise XendError(str(ex)) + def domain_sched_bcredit_get(self, domid): + """Get boost credit scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @rtype: dict with keys 'bc_weight' and 'bc_cap' and 'bc_max_boost_period' and 'bc_ratio' + @return: boost credit scheduler parameters + """ + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + + if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + try: + return xc.sched_bcredit_domain_get(dominfo.getDomid()) + except Exception, ex: + raise XendError(str(ex)) + else: + return {'bc_weight' : dominfo.getBCWeight(), + 'bc_cap' : dominfo.getBCCap(), + 'bc_max_boost_period' : dominfo.getBCMaxBoostPeriod(), + 'bc_ratio' : dominfo.getBCRatio()} + + def domain_sched_bcredit_set(self, domid, bc_weight = None, bc_cap = None, bc_max_boost_period = None, bc_ratio = None): + """Set boost credit scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @type bc_weight: int + @type bc_cap: int + @type bc_max_boost_period: int + @type bc_ratio: int + @rtype: 0 + """ + set_weight = False + set_cap = False + set_max_boost_period = False + set_ratio = False + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + if bc_weight is None: + bc_weight = int(0) + elif bc_weight < 1 or bc_weight > 65535: + raise XendError("bc_weight is out of range") + else: + set_weight = True + + if bc_cap is None: + bc_cap = int(~0) + elif bc_cap < 0 or bc_cap > dominfo.getVCpuCount() * 100: + raise XendError("bc_cap is out of range") + else: + set_cap = True + + if bc_max_boost_period is None: + bc_max_boost_period = int(~0) + elif bc_max_boost_period < 0: + raise XendError("bc_max_boost_period is out of range") + else: + set_max_boost_period = True + + if bc_ratio is None: + bc_ratio = int(~0) + elif bc_ratio < 0: + raise XendError("bc_ratio is out of range") + else: + set_ratio = True + + assert type(bc_weight) == int + assert type(bc_cap) == int + assert type(bc_max_boost_period) == int + assert type(bc_ratio) == int + + rc = 0 + if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + rc = xc.sched_bcredit_domain_set(dominfo.getDomid(), bc_weight, bc_cap, bc_max_boost_period, bc_ratio) + if rc == 0: + if set_weight: + dominfo.setBCWeight(bc_weight) + if set_cap: + dominfo.setBCCap(bc_cap) + if set_max_boost_period: + dominfo.setBCMaxBoostPeriod(bc_max_boost_period) + if set_ratio: + dominfo.setBCRatio(bc_ratio) + self.managed_config_save(dominfo) + return rc + except Exception, ex: + log.exception(ex) + raise XendError(str(ex)) + def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index 9784f47..a51edd1 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -465,6 +465,14 @@ class XendDomainInfo: xendomains.domain_sched_credit_set(self.getDomid(), self.getWeight(), self.getCap()) + + if xennode.xenschedinfo() == 'bcredit': + xendomains.domain_sched_bcredit_set(self.getDomid(), + self.getBCWeight(), + self.getBCCap(), + self.getBCMaxBoostPeriod(), + self.getBCRatio()) + except: log.exception('VM start failed') self.destroy() @@ -1569,6 +1577,30 @@ class XendDomainInfo: def setWeight(self, cpu_weight): self.info['vcpus_params']['weight'] = cpu_weight + def getBCCap(self): + return self.info['vcpus_params']['bc_cap'] + + def setBCCap(self, cpu_bc_cap): + self.info['vcpus_params']['bc_cap'] = cpu_bc_cap + + def getBCWeight(self): + return self.info['vcpus_params']['bc_weight'] + + def setBCWeight(self, cpu_bc_weight): + self.info['vcpus_params']['bc_weight'] = cpu_bc_weight + + def getBCMaxBoostPeriod(self): + return self.info['vcpus_params']['bc_max_boost_period'] + + def setBCMaxBoostPeriod(self, cpu_bc_max_boost_period): + self.info['vcpus_params']['bc_max_boost_period'] = cpu_bc_max_boost_period + + def getBCRatio(self): + return self.info['vcpus_params']['bc_ratio'] + + def setBCRatio(self, cpu_bc_ratio): + self.info['vcpus_params']['bc_ratio'] = cpu_bc_ratio + def getRestartCount(self): return self._readVm('xend/restart_count') diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py index 72db297..3ef4221 100644 --- a/tools/python/xen/xend/XendNode.py +++ b/tools/python/xen/xend/XendNode.py @@ -555,6 +555,8 @@ class XendNode: return 'sedf' elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: return 'credit' + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_BCREDIT: + return 'bcredit' else: return 'unknown' @@ -714,6 +716,8 @@ class XendNode: return 'sedf' elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: return 'credit' + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_BCREDIT: + return 'bcredit' else: return 'unknown' diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py index 8ed8b1b..90d93e5 100644 --- a/tools/python/xen/xm/main.py +++ b/tools/python/xen/xm/main.py @@ -151,6 +151,8 @@ SUBCOMMAND_HELP = { 'sched-sedf' : (' [options]', 'Get/set EDF parameters.'), 'sched-credit': ('[-d [-w[=WEIGHT]|-c[=CAP]]]', 'Get/set credit scheduler parameters.'), + 'sched-bcredit': ('[-d [-w[=WEIGHT]|-c[=CAP]|-m[=MAXBOOSTPERIOD]|-r[=RATIO]]]', + ''), 'sysrq' : (' ', 'Send a sysrq to a domain.'), 'debug-keys' : ('', 'Send debug keys to Xen.'), 'trigger' : (' []', @@ -240,6 +242,13 @@ SUBCOMMAND_OPTIONS = { ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'), ('-c CAP', '--cap=CAP', 'Cap (int)'), ), + 'sched-bcredit': ( + ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'), + ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'), + ('-c CAP', '--cap=CAP', 'Cap (int)'), + ('-m PERIOD', '--maxboostperiod=PERIOD', 'Upper limit of boost period (ms)'), + ('-r RATIO', '--ratio=RATIO', 'Boost ratio per a CPU (int)'), + ), 'list': ( ('-l', '--long', 'Output all VM details in SXP'), ('', '--label', 'Include security labels'), @@ -1657,6 +1666,116 @@ def xm_sched_credit(args): if result != 0: err(str(result)) +def xm_sched_bcredit(args): + """Get/Set options for Boost Credit Scheduler.""" + + check_sched_type('bcredit') + + try: + opts, params = getopt.getopt(args, "d:w:c:m:r:", + ["domain=", "weight=", "cap=", "maxboostperiod=", "ratio="]) + except getopt.GetoptError, opterr: + err(opterr) + usage('sched-bcredit') + + domid = None + weight = None + cap = None + max_boost_period = None + boost_ratio = None + + for o, a in opts: + if o in ["-d", "--domain"]: + domid = a + elif o in ["-w", "--weight"]: + weight = int(a) + elif o in ["-c", "--cap"]: + cap = int(a) + elif o in ["-m", "--maxboostperiod"]: + max_boost_period = int(a) + elif o in ["-r", "--ratio"]: + boost_ratio = int(a) + + doms = filter(lambda x : domid_match(domid, x), + [parse_doms_info(dom) + for dom in getDomains(None, 'all')]) + + if weight is None and cap is None and max_boost_period is None and boost_ratio is None: + if domid is not None and doms == []: + err("Domain '%s' does not exist." % domid) + usage('sched-bcredit') + # print header if we aren't setting any parameters + print '%-33s %4s %6s %4s %8s %5s' % ('Name','ID','Weight','Cap','Max(ms)','Ratio') + + for d in doms: + try: + if serverType == SERVER_XEN_API: + info = server.xenapi.VM_metrics.get_VCPUs_params( + server.xenapi.VM.get_metrics( + get_single_vm(d['name']))) + else: + info = server.xend.domain.sched_bcredit_get(d['name']) + except xmlrpclib.Fault: + pass + + if 'bc_weight' not in info or 'bc_cap' not in info or 'bc_max_boost_period' not in info or 'bc_ratio' not in info: + # domain does not support sched-bcredit? + info = {'bc_weight': -1, 'bc_cap': -1, 'bc_max_boost_period': -1, 'bc_ratio': -1} + + info['bc_weight'] = int(info['bc_weight']) + info['bc_cap'] = int(info['bc_cap']) + info['bc_max_boost_period'] = int(info['bc_max_boost_period']) + info['bc_ratio'] = int(info['bc_ratio']) + + info['name'] = d['name'] + info['domid'] = str(d['domid']) + print( ("%(name)-32s %(domid)5s %(bc_weight)6d %(bc_cap)4d %(bc_max_boost_period)8d %(bc_ratio)5d") % info) + else: + if domid is None: + # place holder for system-wide scheduler parameters + err("No domain given.") + usage('sched-bcredit') + + if serverType == SERVER_XEN_API: + if doms[0]['domid']: + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "bc_weight", + weight) + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "bc_cap", + cap) + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "bc_max_boost_period", + max_boost_period) + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "bc_ratio", + boost_ratio) + else: + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "bc_weight", + weight) + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "bc_cap", + cap) + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "bc_max_boost_period", + max_boost_period) + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "bc_ratio", + boost_ratio) + else: + result = server.xend.domain.sched_bcredit_set(domid, weight, cap, max_boost_period, boost_ratio) + if result != 0: + err(str(result)) + def xm_info(args): arg_check(args, "info", 0, 1) @@ -2824,6 +2943,7 @@ commands = { # scheduler "sched-sedf": xm_sched_sedf, "sched-credit": xm_sched_credit, + "sched-bcredit": xm_sched_bcredit, # block "block-attach": xm_block_attach, "block-detach": xm_block_detach, diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index 3ba7d3e..a4530a8 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -1401,3 +1401,1003 @@ struct scheduler sched_credit_def = { .dump_settings = csched_dump, .init = csched_init, }; + + +/* + * Boost Credit Schdeuler(bcredit) + * Alternative Credit Scheduler optimized for client hypervisor + */ + +/* + * Basic constants + */ +#define BCSCHED_DEFAULT_WEIGHT CSCHED_DEFAULT_WEIGHT +#define BCSCHED_TICKS_PER_TSLICE CSCHED_TICKS_PER_TSLICE +#define BCSCHED_TICKS_PER_ACCT CSCHED_TICKS_PER_ACCT +#define BCSCHED_MSECS_PER_TICK CSCHED_MSECS_PER_TICK +#define BCSCHED_MSECS_PER_TSLICE \ + (BCSCHED_MSECS_PER_TICK * BCSCHED_TICKS_PER_TSLICE) +#define BCSCHED_CREDITS_PER_TICK 10000 +#define BCSCHED_CREDITS_PER_TSLICE \ + (BCSCHED_CREDITS_PER_TICK * BCSCHED_TICKS_PER_TSLICE) +#define BCSCHED_CREDITS_PER_ACCT \ + (BCSCHED_CREDITS_PER_TICK * BCSCHED_TICKS_PER_ACCT) +#define BCSCHED_MSECS_BOOSTTSLICE_PER_CPU 2 +#define BCSCHED_NSECS_MIN_BOOST_TSLICE 500000 + +/* + * Macros + */ +#define svc_sbvc(_v) (container_of((_v), struct bcsched_vcpu, svc)) +#define sdom_sbdom(_d) (container_of((_d), struct bcsched_dom, sdom)) + +/* + * Virtual CPU + */ +struct bcsched_vcpu { + struct csched_vcpu svc; + struct list_head inactive_vcpu_elem; + s_time_t start_time; + atomic_t boost_credit; +}; + +/* + * Domain + */ +struct bcsched_dom { + struct csched_dom sdom; + uint16_t boost_ratio; + uint16_t max_boost_period; +}; + +/* + * System-wide private data + */ +struct bcsched_private { + struct list_head inactive_vcpu; + uint32_t nvcpus; + s_time_t boost_tslice; + uint32_t boost_credit; + uint16_t total_boost_ratio; +}; + +/* + * Global variables + */ +static struct bcsched_private bcsched_priv; + +/* opt_bcsched_tslice: time slice for BOOST priority */ +static unsigned int opt_bcsched_tslice = BCSCHED_MSECS_BOOSTTSLICE_PER_CPU; +integer_param("bcsched_tslice", opt_bcsched_tslice); + +static void bcsched_tick(void *_cpu); + +static int +bcsched_pcpu_init(int cpu) +{ + struct csched_pcpu *spc; + unsigned long flags; + + /* Allocate per-PCPU info */ + spc = xmalloc(struct csched_pcpu); + if ( spc == NULL ) + return -1; + + spin_lock_irqsave(&csched_priv.lock, flags); + + /* Initialize/update system-wide config */ + csched_priv.credit += BCSCHED_CREDITS_PER_ACCT; + if ( csched_priv.ncpus <= cpu ) + csched_priv.ncpus = cpu + 1; + if ( csched_priv.master >= csched_priv.ncpus ) + csched_priv.master = cpu; + + init_timer(&spc->ticker, bcsched_tick, (void *)(unsigned long)cpu, cpu); + INIT_LIST_HEAD(&spc->runq); + spc->runq_sort_last = csched_priv.runq_sort; + per_cpu(schedule_data, cpu).sched_priv = spc; + + /* Start off idling... */ + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + cpu_set(cpu, csched_priv.idlers); + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + return 0; +} + +static inline void +__bcsched_vcpu_acct_start_locked(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + struct bcsched_vcpu * const sbvc = svc_sbvc(svc); + struct bcsched_dom * const sbdom = sdom_sbdom(sdom); + + CSCHED_VCPU_STAT_CRANK(svc, state_active); + CSCHED_STAT_CRANK(acct_vcpu_active); + + sdom->active_vcpu_count++; + list_add(&svc->active_vcpu_elem, &sdom->active_vcpu); + list_del_init(&sbvc->inactive_vcpu_elem); + if ( list_empty(&sdom->active_sdom_elem) ) + { + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + csched_priv.weight += sdom->weight; + bcsched_priv.boost_credit += (sbdom->boost_ratio * + BCSCHED_CREDITS_PER_TSLICE) / 100; + } +} + +static inline void +__bcsched_vcpu_acct_stop_locked(struct csched_vcpu *svc) +{ + struct csched_dom * const sdom = svc->sdom; + struct bcsched_vcpu * const sbvc = svc_sbvc(svc); + struct bcsched_dom * const sbdom = sdom_sbdom(sdom); + + BUG_ON( list_empty(&svc->active_vcpu_elem) ); + + CSCHED_VCPU_STAT_CRANK(svc, state_idle); + CSCHED_STAT_CRANK(acct_vcpu_idle); + + sdom->active_vcpu_count--; + list_del_init(&svc->active_vcpu_elem); + list_add(&sbvc->inactive_vcpu_elem, &bcsched_priv.inactive_vcpu); + if ( list_empty(&sdom->active_vcpu) ) + { + BUG_ON( csched_priv.weight < sdom->weight ); + list_del_init(&sdom->active_sdom_elem); + csched_priv.weight -= sdom->weight; + bcsched_priv.boost_credit -= (sbdom->boost_ratio * + BCSCHED_CREDITS_PER_TSLICE) / 100; + } +} + +static void +bcsched_vcpu_acct(unsigned int cpu) +{ + ASSERT( current->processor == cpu ); + ASSERT( CSCHED_VCPU(current)->sdom != NULL ); + + /* + * If it's been active a while, check if we'd be better off + * migrating it to run elsewhere (see multi-core and multi-thread + * support in csched_cpu_pick()). + */ + if ( csched_cpu_pick(current) != cpu ) + { + CSCHED_VCPU_STAT_CRANK(CSCHED_VCPU(current), migrate_r); + CSCHED_STAT_CRANK(migrate_running); + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } +} + +static int +bcsched_vcpu_init(struct vcpu *vc) +{ + struct domain * const dom = vc->domain; + struct csched_dom *sdom = CSCHED_DOM(dom); + struct bcsched_vcpu *sbvc; + struct csched_vcpu *svc; + unsigned long flags; + + CSCHED_STAT_CRANK(vcpu_init); + + /* Allocate per-VCPU info */ + sbvc = xmalloc(struct bcsched_vcpu); + if ( sbvc == NULL ) + return -1; + svc = &(sbvc->svc); + + INIT_LIST_HEAD(&svc->runq_elem); + INIT_LIST_HEAD(&svc->active_vcpu_elem); + INIT_LIST_HEAD(&sbvc->inactive_vcpu_elem); + svc->sdom = sdom; + svc->vcpu = vc; + atomic_set(&svc->credit, 0); + svc->flags = 0U; + svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER; + CSCHED_VCPU_STATS_RESET(svc); + vc->sched_priv = svc; + atomic_set(&sbvc->boost_credit, 0); + + /* Allocate per-PCPU info */ + if ( unlikely(!CSCHED_PCPU(vc->processor)) ) + { + if ( bcsched_pcpu_init(vc->processor) != 0 ) + return -1; + } + + /* Add inactive queue in order to start acct */ + if ( !is_idle_vcpu(vc) ) + { + uint32_t vcpus_per_cpu; + + spin_lock_irqsave(&csched_priv.lock, flags); + + list_add(&sbvc->inactive_vcpu_elem, &bcsched_priv.inactive_vcpu); + + bcsched_priv.nvcpus++; + vcpus_per_cpu = ( (bcsched_priv.nvcpus + (csched_priv.ncpus-1)) / + csched_priv.ncpus + ) - 1; + if ( vcpus_per_cpu == 0 ) + bcsched_priv.boost_tslice = MILLISECS(BCSCHED_MSECS_PER_TSLICE); + else + { + bcsched_priv.boost_tslice = MILLISECS(opt_bcsched_tslice) / + vcpus_per_cpu; + if ( bcsched_priv.boost_tslice < BCSCHED_NSECS_MIN_BOOST_TSLICE ) + bcsched_priv.boost_tslice = BCSCHED_NSECS_MIN_BOOST_TSLICE; + } + + spin_unlock_irqrestore(&csched_priv.lock, flags); + } + + CSCHED_VCPU_CHECK(vc); + return 0; +} + +static void +bcsched_vcpu_destroy(struct vcpu *vc) +{ + struct csched_vcpu * const svc = CSCHED_VCPU(vc); + struct bcsched_vcpu * const sbvc = svc_sbvc(svc); + struct csched_dom * const sdom = svc->sdom; + unsigned long flags; + + CSCHED_STAT_CRANK(vcpu_destroy); + + BUG_ON( sdom == NULL ); + BUG_ON( !list_empty(&svc->runq_elem) ); + + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( !list_empty(&svc->active_vcpu_elem) ) + __bcsched_vcpu_acct_stop_locked(svc); + + if ( !list_empty(&sbvc->inactive_vcpu_elem) ) + list_del_init(&sbvc->inactive_vcpu_elem); + + if ( !is_idle_vcpu(vc) ) + { + uint32_t vcpus_per_cpu; + + bcsched_priv.nvcpus--; + vcpus_per_cpu = ( (bcsched_priv.nvcpus + (csched_priv.ncpus-1)) / + csched_priv.ncpus + ) - 1; + if ( vcpus_per_cpu == 0 ) + bcsched_priv.boost_tslice = MILLISECS(BCSCHED_MSECS_PER_TSLICE); + else + { + bcsched_priv.boost_tslice = MILLISECS(opt_bcsched_tslice) / + vcpus_per_cpu; + if ( bcsched_priv.boost_tslice < BCSCHED_NSECS_MIN_BOOST_TSLICE ) + bcsched_priv.boost_tslice = BCSCHED_NSECS_MIN_BOOST_TSLICE; + } + } + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + xfree(sbvc); +} + +static int +bcsched_dom_cntl( + struct domain *d, + struct xen_domctl_scheduler_op *op) +{ + struct csched_dom * const sdom = CSCHED_DOM(d); + struct bcsched_dom * const sbdom = sdom_sbdom(sdom); + unsigned long flags; + + if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo ) + { + op->u.bcredit.weight = sdom->weight; + op->u.bcredit.cap = sdom->cap; + op->u.bcredit.max_boost_period = sbdom->max_boost_period; + op->u.bcredit.boost_ratio = sbdom->boost_ratio; + } + else + { + uint16_t weight = (uint16_t)~0U; + + ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo); + + spin_lock_irqsave(&csched_priv.lock, flags); + + if ( (op->u.bcredit.weight != 0) && + (sbdom->boost_ratio == 0 || op->u.bcredit.boost_ratio == 0) ) + { + weight = op->u.bcredit.weight; + } + + if ( op->u.bcredit.cap != (uint16_t)~0U ) + sdom->cap = op->u.bcredit.cap; + + if ( (op->u.bcredit.max_boost_period != (uint16_t)~0U) && + (op->u.bcredit.max_boost_period >= BCSCHED_MSECS_PER_TSLICE || + op->u.bcredit.max_boost_period == 0) ) + { + sbdom->max_boost_period = op->u.bcredit.max_boost_period; + } + + if ( (op->u.bcredit.boost_ratio != (uint16_t)~0U) && + ((bcsched_priv.total_boost_ratio - sbdom->boost_ratio + + op->u.bcredit.boost_ratio) <= 100 * csched_priv.ncpus) && + (sbdom->max_boost_period || op->u.bcredit.boost_ratio == 0) ) + { + uint16_t new_bc, old_bc; + + new_bc = ( op->u.bcredit.boost_ratio * + BCSCHED_CREDITS_PER_TSLICE ) / 100; + old_bc = ( sbdom->boost_ratio * + BCSCHED_CREDITS_PER_TSLICE ) / 100; + + bcsched_priv.total_boost_ratio -= sbdom->boost_ratio; + bcsched_priv.total_boost_ratio += op->u.bcredit.boost_ratio; + + sbdom->boost_ratio = op->u.bcredit.boost_ratio; + + if ( !list_empty(&sdom->active_sdom_elem) ) + { + bcsched_priv.boost_credit -= old_bc; + bcsched_priv.boost_credit += new_bc; + } + if ( new_bc == 0 ) + { + if ( sdom->weight == 0 ) + weight = BCSCHED_DEFAULT_WEIGHT; + } + else + weight = 0; + } + + if ( weight != (uint16_t)~0U ) + { + if ( !list_empty(&sdom->active_sdom_elem) ) + { + csched_priv.weight -= sdom->weight; + csched_priv.weight += weight; + } + sdom->weight = weight; + } + + spin_unlock_irqrestore(&csched_priv.lock, flags); + } + + return 0; +} + +static int +bcsched_dom_init(struct domain *dom) +{ + struct csched_dom *sdom; + struct bcsched_dom *sbdom; + + CSCHED_STAT_CRANK(dom_init); + + if ( is_idle_domain(dom) ) + return 0; + + sbdom = xmalloc(struct bcsched_dom); + if ( sbdom == NULL ) + return -ENOMEM; + sdom = &(sbdom->sdom); + + /* Initalize credit and weight */ + INIT_LIST_HEAD(&sdom->active_vcpu); + sdom->active_vcpu_count = 0; + INIT_LIST_HEAD(&sdom->active_sdom_elem); + sdom->dom = dom; + sdom->weight = BCSCHED_DEFAULT_WEIGHT; + sdom->cap = 0U; + sbdom->boost_ratio = 0U; + sbdom->max_boost_period = 0; + dom->sched_priv = sdom; + + return 0; +} + +static void +bcsched_dom_destroy(struct domain *dom) +{ + CSCHED_STAT_CRANK(dom_destroy); + xfree(sdom_sbdom(CSCHED_DOM(dom))); +} + +/* + * This is a O(n) optimized sort of the runq. + * + * Time-share VCPUs can only be one of three priorities, BOOST, UNDER or OVER. + * We walk through the runq and move up any BOOSTs that are preceded by UNDERs + * or OVERs, and any UNDERs that are preceded by OVERs. We remember the last + * BOOST and UNDER to make the move up operation O(1). + */ +static void +bcsched_runq_sort(unsigned int cpu) +{ + struct csched_pcpu * const spc = CSCHED_PCPU(cpu); + struct list_head *runq, *elem, *next, *last_boost, *last_under; + struct csched_vcpu *svc_elem; + unsigned long flags; + int sort_epoch; + + sort_epoch = csched_priv.runq_sort; + if ( sort_epoch == spc->runq_sort_last ) + return; + + spc->runq_sort_last = sort_epoch; + + spin_lock_irqsave(&per_cpu(schedule_data, cpu).schedule_lock, flags); + + runq = &spc->runq; + elem = runq->next; + last_boost = last_under = runq; + while ( elem != runq ) + { + next = elem->next; + svc_elem = __runq_elem(elem); + + if ( svc_elem->pri == CSCHED_PRI_TS_BOOST ) + { + /* does elem need to move up the runq? */ + if ( elem->prev != last_boost ) + { + list_del(elem); + list_add(elem, last_boost); + } + if ( last_boost == last_under ) + last_under = elem; + last_boost = elem; + } + else if ( svc_elem->pri == CSCHED_PRI_TS_UNDER ) + { + /* does elem need to move up the runq? */ + if ( elem->prev != last_under ) + { + list_del(elem); + list_add(elem, last_under); + } + last_under = elem; + } + + elem = next; + } + + spin_unlock_irqrestore(&per_cpu(schedule_data, cpu).schedule_lock, flags); +} + +static void +bcsched_acct(void) +{ + unsigned long flags; + struct list_head *iter_vcpu, *next_vcpu; + struct list_head *iter_sdom, *next_sdom; + struct bcsched_vcpu *sbvc; + struct bcsched_dom *sbdom; + struct csched_vcpu *svc; + struct csched_dom *sdom; + uint32_t credit_total; + uint32_t weight_total; + uint32_t bc_total; + uint32_t weight_left; + uint32_t credit_fair; + uint32_t credit_peak; + uint32_t credit_cap; + uint32_t bc_fair; + int credit_balance; + int credit_xtra; + int credit; + int boost_credit; + int max_boost_credit; + int64_t c_sum, bc_sum; + int c_average, bc_average; + + + spin_lock_irqsave(&csched_priv.lock, flags); + + /* Add vcpu to active list when its credits were consumued by one tick */ + list_for_each_safe( iter_vcpu, next_vcpu, &bcsched_priv.inactive_vcpu ) + { + sbvc = list_entry(iter_vcpu, struct bcsched_vcpu, inactive_vcpu_elem); + svc = &(sbvc->svc); + sbdom = sdom_sbdom(svc->sdom); + + max_boost_credit = sbdom->max_boost_period * + (BCSCHED_CREDITS_PER_TSLICE/BCSCHED_MSECS_PER_TSLICE); + if ( (atomic_read(&sbvc->boost_credit) + <= (max_boost_credit-BCSCHED_CREDITS_PER_TICK)) || + (atomic_read(&svc->credit) + <= BCSCHED_CREDITS_PER_TICK*(BCSCHED_TICKS_PER_ACCT-1)) ) + { + __bcsched_vcpu_acct_start_locked(svc); + } + } + + weight_total = csched_priv.weight; + credit_total = csched_priv.credit; + bc_total = bcsched_priv.boost_credit; + + /* Converge balance towards 0 when it drops negative */ + if ( csched_priv.credit_balance < 0 ) + { + credit_total -= csched_priv.credit_balance; + CSCHED_STAT_CRANK(acct_balance); + } + + if ( unlikely(weight_total == 0 && bc_total == 0) ) + { + csched_priv.credit_balance = 0; + spin_unlock_irqrestore(&csched_priv.lock, flags); + CSCHED_STAT_CRANK(acct_no_work); + return; + } + + CSCHED_STAT_CRANK(acct_run); + + weight_left = weight_total; + credit_balance = 0; + credit_xtra = 0; + credit_cap = 0U; + + /* Firstly, subtract boost credits from credit_total. */ + if ( bc_total != 0 ) + { + credit_total -= bc_total; + credit_balance += bc_total; + } + + /* Avoid 0 divide error */ + if ( weight_total == 0 ) + weight_total = 1; + + list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom ) + { + sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); + sbdom = sdom_sbdom(sdom); + + BUG_ON( is_idle_domain(sdom->dom) ); + BUG_ON( sdom->active_vcpu_count == 0 ); + BUG_ON( sdom->weight > weight_left ); + + max_boost_credit = sbdom->max_boost_period * + (BCSCHED_CREDITS_PER_TSLICE/BCSCHED_MSECS_PER_TSLICE); + c_sum = bc_sum = 0; + list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu ) + { + svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem); + sbvc = svc_sbvc(svc); + + BUG_ON( sdom != svc->sdom ); + + c_sum += atomic_read(&svc->credit); + bc_sum += atomic_read(&sbvc->boost_credit); + } + c_average = ( c_sum + ( sdom->active_vcpu_count - 1 ) + ) / sdom->active_vcpu_count; + bc_average = ( bc_sum + ( sdom->active_vcpu_count - 1 ) + ) / sdom->active_vcpu_count; + + weight_left -= sdom->weight; + + /* + * A domain's fair share is computed using its weight in competition + * with that of all other active domains. + * + * At most, a domain can use credits to run all its active VCPUs + * for one full accounting period. We allow a domain to earn more + * only when the system-wide credit balance is negative. + */ + credit_peak = sdom->active_vcpu_count * BCSCHED_CREDITS_PER_ACCT; + if ( csched_priv.credit_balance < 0 ) + { + credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) + + (weight_total - 1) + ) / weight_total; + } + + if ( sdom->cap != 0U ) + { + credit_cap = ((sdom->cap * BCSCHED_CREDITS_PER_ACCT) + 99) / 100; + if ( credit_cap < credit_peak ) + credit_peak = credit_cap; + + credit_cap = ( credit_cap + ( sdom->active_vcpu_count - 1 ) + ) / sdom->active_vcpu_count; + } + + credit_fair = ( ( credit_total * sdom->weight) + (weight_total - 1) + ) / weight_total; + + if ( credit_fair < credit_peak ) + { + /* credit_fair is 0 if weight is 0. */ + if ( sdom->weight != 0 ) + credit_xtra = 1; + } + else + { + if ( weight_left != 0U ) + { + /* Give other domains a chance at unused credits */ + credit_total += ( ( ( credit_fair - credit_peak + ) * weight_total + ) + ( weight_left - 1 ) + ) / weight_left; + } + + if ( credit_xtra ) + { + /* + * Lazily keep domains with extra credits at the head of + * the queue to give others a chance at them in future + * accounting periods. + */ + CSCHED_STAT_CRANK(acct_reorder); + list_del(&sdom->active_sdom_elem); + list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom); + } + + credit_fair = credit_peak; + } + + /* Compute fair share per VCPU */ + credit_fair = ( credit_fair + ( sdom->active_vcpu_count - 1 ) + ) / sdom->active_vcpu_count; + + /* Compute fair share of boost_credit per VCPU */ + bc_fair = ( ((sbdom->boost_ratio * BCSCHED_CREDITS_PER_ACCT)/100) + + (sdom->active_vcpu_count - 1) + ) / sdom->active_vcpu_count; + + list_for_each_safe( iter_vcpu, next_vcpu, &sdom->active_vcpu ) + { + svc = list_entry(iter_vcpu, struct csched_vcpu, active_vcpu_elem); + sbvc = svc_sbvc(svc); + + BUG_ON( sdom != svc->sdom ); + + /* Balance two credits */ + credit = atomic_read(&svc->credit); + atomic_add(c_average - credit, &svc->credit); + boost_credit = atomic_read(&sbvc->boost_credit); + atomic_add(bc_average - boost_credit, &sbvc->boost_credit); + boost_credit = atomic_read(&sbvc->boost_credit); + if ( sbdom->boost_ratio != 0 ) + { + /* Increment boost credit */ + atomic_add(bc_fair, &sbvc->boost_credit); + boost_credit = atomic_read(&sbvc->boost_credit); + + /* + * Upper bound on boost credits. + * Add excess to credit. + */ + if ( boost_credit > max_boost_credit ) + { + atomic_add(boost_credit - max_boost_credit, &svc->credit); + atomic_set(&sbvc->boost_credit, max_boost_credit); + boost_credit = atomic_read(&sbvc->boost_credit); + } + /* + * If credit is negative, + * boost credits compensate credit. + */ + credit = atomic_read(&svc->credit); + if ( credit < 0 && boost_credit > 0 ) + { + if ( boost_credit > -credit ) + { + atomic_sub(-credit, &sbvc->boost_credit); + atomic_add(-credit, &svc->credit); + } + else + { + atomic_sub(boost_credit, &sbvc->boost_credit); + atomic_add(boost_credit, &svc->credit); + } + boost_credit = atomic_read(&sbvc->boost_credit); + } + } + + /* Increment credit */ + atomic_add(credit_fair, &svc->credit); + credit = atomic_read(&svc->credit); + + /* + * Recompute priority or, if VCPU is idling, remove it from + * the active list. + */ + if ( credit < 0 ) + { + svc->pri = CSCHED_PRI_TS_OVER; + + /* Park running VCPUs of capped-out domains */ + if ( sdom->cap != 0U && + credit < -credit_cap && + !(svc->flags & CSCHED_FLAG_VCPU_PARKED) ) + { + CSCHED_STAT_CRANK(vcpu_park); + vcpu_pause_nosync(svc->vcpu); + svc->flags |= CSCHED_FLAG_VCPU_PARKED; + } + + /* Lower bound on credits */ + if ( credit < -BCSCHED_CREDITS_PER_TSLICE ) + { + CSCHED_STAT_CRANK(acct_min_credit); + credit = -BCSCHED_CREDITS_PER_TSLICE; + atomic_set(&svc->credit, credit); + } + } + else + { + if ( boost_credit <= 0 ) + svc->pri = CSCHED_PRI_TS_UNDER; + else + svc->pri = CSCHED_PRI_TS_BOOST; + + /* Unpark any capped domains whose credits go positive */ + if ( svc->flags & CSCHED_FLAG_VCPU_PARKED) + { + /* + * It's important to unset the flag AFTER the unpause() + * call to make sure the VCPU's priority is not boosted + * if it is woken up here. + */ + CSCHED_STAT_CRANK(vcpu_unpark); + vcpu_unpause(svc->vcpu); + svc->flags &= ~CSCHED_FLAG_VCPU_PARKED; + } + + if ( credit > BCSCHED_CREDITS_PER_TSLICE ) + { + atomic_add(credit - BCSCHED_CREDITS_PER_TSLICE, + &sbvc->boost_credit); + boost_credit = atomic_read(&sbvc->boost_credit); + credit = BCSCHED_CREDITS_PER_TSLICE; + atomic_set(&svc->credit, credit); + + if ( boost_credit > max_boost_credit ) + { + atomic_set(&sbvc->boost_credit, max_boost_credit); + __bcsched_vcpu_acct_stop_locked(svc); + } + } + } + + if ( sbdom->boost_ratio == 0 ) + { + CSCHED_VCPU_STAT_SET(svc, credit_last, credit); + CSCHED_VCPU_STAT_SET(svc, credit_incr, credit_fair); + credit_balance += credit; + } + else + { + CSCHED_VCPU_STAT_SET(svc, credit_last, boost_credit); + CSCHED_VCPU_STAT_SET(svc, credit_incr, bc_fair); + } + } + } + + csched_priv.credit_balance = credit_balance; + + spin_unlock_irqrestore(&csched_priv.lock, flags); + + /* Inform each CPU that its runq needs to be sorted */ + csched_priv.runq_sort++; +} + +static void +bcsched_tick(void *_cpu) +{ + unsigned int cpu = (unsigned long)_cpu; + struct csched_pcpu *spc = CSCHED_PCPU(cpu); + + spc->tick++; + + /* + * Accounting for running VCPU + */ + if ( !is_idle_vcpu(current) ) + bcsched_vcpu_acct(cpu); + + /* + * Host-wide accounting duty + * + * Note: Currently, this is always done by the master boot CPU. Eventually, + * we could distribute or at the very least cycle the duty. + */ + if ( (csched_priv.master == cpu) && + (spc->tick % BCSCHED_TICKS_PER_ACCT) == 0 ) + { + bcsched_acct(); + } + + /* + * Check if runq needs to be sorted + * + * Every physical CPU resorts the runq after the accounting master has + * modified priorities. This is a special O(n) sort and runs at most + * once per accounting period (currently 30 milliseconds). + */ + bcsched_runq_sort(cpu); + + set_timer(&spc->ticker, NOW() + MILLISECS(BCSCHED_MSECS_PER_TICK)); +} + +static struct task_slice +bcsched_schedule(s_time_t now) +{ + struct csched_vcpu *svc = CSCHED_VCPU(current); + struct bcsched_vcpu *sbvc = svc_sbvc(svc); + s_time_t passed = now - sbvc->start_time; + int consumed; + int boost_credit; + struct task_slice ret; + + /* + * Update credit + */ + consumed = ( passed + + (MILLISECS(BCSCHED_MSECS_PER_TSLICE) / + BCSCHED_CREDITS_PER_TSLICE - 1) + ) / (MILLISECS(BCSCHED_MSECS_PER_TSLICE) / + BCSCHED_CREDITS_PER_TSLICE); + if ( svc->pri == CSCHED_PRI_TS_BOOST ) + { + boost_credit = atomic_read(&sbvc->boost_credit); + if ( boost_credit > consumed ) + { + atomic_sub(consumed, &sbvc->boost_credit); + consumed = 0; + } + else + { + atomic_sub(boost_credit, &sbvc->boost_credit); + consumed -= boost_credit; + svc->pri = CSCHED_PRI_TS_UNDER; + } + } + if ( consumed > 0 && !is_idle_vcpu(current) ) + atomic_sub(consumed, &svc->credit); + + ret = csched_schedule(now); + + svc = CSCHED_VCPU(ret.task); + if ( svc->pri == CSCHED_PRI_TS_BOOST ) + ret.time = bcsched_priv.boost_tslice; + + sbvc = svc_sbvc(svc); + sbvc->start_time = now; + + return ret; +} + +static void +bcsched_dump_vcpu(struct csched_vcpu *svc) +{ + struct bcsched_vcpu * const sbvc = svc_sbvc(svc); + + csched_dump_vcpu(svc); + + if ( svc->sdom ) + { + struct bcsched_dom * const sbdom = sdom_sbdom(svc->sdom); + + printk("\t bc=%i [bc=%i]\n", + atomic_read(&sbvc->boost_credit), + sbdom->boost_ratio * BCSCHED_CREDITS_PER_TSLICE / 100); + } +} + +static void +bcsched_dump(void) +{ + struct list_head *iter_sdom, *iter_svc; + int loop; + char idlers_buf[100]; + + printk("info:\n" + "\tncpus = %u\n" + "\tmaster = %u\n" + "\tcredit = %u\n" + "\tcredit balance = %d\n" + "\tweight = %u\n" + "\trunq_sort = %u\n" + "\tboost_tslice = %"PRId64"\n" + "\tboost_credit = %u\n" + "\ttotal_boost_ratio = %u\n" + "\tdefault-weight = %d\n" + "\tmsecs per tick = %dms\n" + "\tcredits per tick = %d\n" + "\tticks per tslice = %d\n" + "\tticks per acct = %d\n", + csched_priv.ncpus, + csched_priv.master, + csched_priv.credit, + csched_priv.credit_balance, + csched_priv.weight, + csched_priv.runq_sort, + bcsched_priv.boost_tslice, + bcsched_priv.boost_credit, + bcsched_priv.total_boost_ratio, + CSCHED_DEFAULT_WEIGHT, + BCSCHED_MSECS_PER_TICK, + BCSCHED_CREDITS_PER_TICK, + BCSCHED_TICKS_PER_TSLICE, + BCSCHED_TICKS_PER_ACCT); + + cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers); + printk("idlers: %s\n", idlers_buf); + + CSCHED_STATS_PRINTK(); + + printk("active vcpus:\n"); + loop = 0; + list_for_each( iter_sdom, &csched_priv.active_sdom ) + { + struct csched_dom *sdom; + sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem); + + list_for_each( iter_svc, &sdom->active_vcpu ) + { + struct csched_vcpu *svc; + svc = list_entry(iter_svc, struct csched_vcpu, active_vcpu_elem); + + printk("\t%3d: ", ++loop); + bcsched_dump_vcpu(svc); + } + } + + printk("inactive vcpus:\n"); + loop = 0; + list_for_each( iter_svc, &bcsched_priv.inactive_vcpu ) + { + struct bcsched_vcpu *sbvc; + sbvc = list_entry(iter_svc, struct bcsched_vcpu, inactive_vcpu_elem); + + printk("\t%3d: ", ++loop); + bcsched_dump_vcpu(&sbvc->svc); + } +} + +static void +bcsched_init(void) +{ + csched_init(); + + INIT_LIST_HEAD(&bcsched_priv.inactive_vcpu); + bcsched_priv.boost_tslice = MILLISECS(BCSCHED_MSECS_PER_TSLICE); + bcsched_priv.boost_credit = 0; + bcsched_priv.total_boost_ratio = 0; +} + + +struct scheduler sched_bcredit_def = { + .name = "SMP Credit Scheduler for client side", + .opt_name = "bcredit", + .sched_id = XEN_SCHEDULER_BCREDIT, + + .init_domain = bcsched_dom_init, + .destroy_domain = bcsched_dom_destroy, + + .init_vcpu = bcsched_vcpu_init, + .destroy_vcpu = bcsched_vcpu_destroy, + + .sleep = csched_vcpu_sleep, + .wake = csched_vcpu_wake, + + .adjust = bcsched_dom_cntl, + + .pick_cpu = csched_cpu_pick, + .do_schedule = bcsched_schedule, + + .dump_cpu_state = csched_dump_pcpu, + .dump_settings = bcsched_dump, + .init = bcsched_init, +}; + diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 04b09e2..ffcb432 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -51,9 +51,11 @@ DEFINE_PER_CPU(struct schedule_data, schedule_data); extern struct scheduler sched_sedf_def; extern struct scheduler sched_credit_def; +extern struct scheduler sched_bcredit_def; static struct scheduler *schedulers[] = { &sched_sedf_def, &sched_credit_def, + &sched_bcredit_def, NULL }; diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c index cc0ec14..084e86b 100644 --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -243,6 +243,7 @@ static int iommu_setup(void) if ( iommu_enabled ) printk("I/O virtualisation for PV guests %sabled\n", iommu_pv_enabled ? "en" : "dis"); + return rc; } __initcall(iommu_setup); diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index b7075ac..8465198 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -294,6 +294,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); /* Scheduler types. */ #define XEN_SCHEDULER_SEDF 4 #define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_BCREDIT 6 /* Set or get info? */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 @@ -312,6 +313,12 @@ struct xen_domctl_scheduler_op { uint16_t weight; uint16_t cap; } credit; + struct xen_domctl_sched_bcredit { + uint16_t weight; + uint16_t cap; + uint16_t max_boost_period; + uint16_t boost_ratio; + } bcredit; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;