ia64/xen-unstable

changeset 1704:083178f6cdfa

bitkeeper revision 1.1044 (40ec19236iHRt47R5UsW46zQ42dHIw)

Implementation of a new scheduler. Based on BVT (Borrowed Virtual Time) but trying to give more fair allocation of CPU for diverse environments (CPU-bound domains running against I/O bound ones). For that reason I called it Fair BVT (or FBVT for short). The BVT implementation gave the basic implementation. This changeset contains also the interface to control the scheduler. Unfortunatelly the contex switch allowance cannot be changed yet (a bug). The parameters introduced in the scheduler are likely to change in near future (after running tests).
author gm281@boulderdash.cl.cam.ac.uk
date Wed Jul 07 15:39:15 2004 +0000 (2004-07-07)
parents 7ee821f4caea
children 0e23f01219c6
files .rootkeys tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_fbvtsched.c tools/python/xen/lowlevel/xc/xc.c tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendNode.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xend/server/SrvNode.py tools/python/xen/xm/main.py xen/common/sched_bvt.c xen/common/sched_fair_bvt.c xen/common/schedule.c xen/include/hypervisor-ifs/sched_ctl.h xen/include/xen/sched-if.h xen/include/xen/sched.h
line diff
     1.1 --- a/.rootkeys	Wed Jun 30 17:10:07 2004 +0000
     1.2 +++ b/.rootkeys	Wed Jul 07 15:39:15 2004 +0000
     1.3 @@ -167,6 +167,7 @@ 3fbba6dbEVkVMX0JuDFzap9jeaucGA tools/lib
     1.4  3fbba6dbasJQV-MVElDC0DGSHMiL5w tools/libxc/xc_domain.c
     1.5  40278d99BLsfUv3qxv0I8C1sClZ0ow tools/libxc/xc_elf.h
     1.6  403e0977Bjsm_e82pwvl9VvaJxh8Gg tools/libxc/xc_evtchn.c
     1.7 +40ec1922Nq_Rur5KUH0MvRNKczPGxg tools/libxc/xc_fbvtsched.c
     1.8  40e03333Eegw8czSWvHsbKxrRZJjRA tools/libxc/xc_io.c
     1.9  40e03333vrWGbLAhyJjXlqCHaJt7eA tools/libxc/xc_io.h
    1.10  3fbba6dbNCU7U6nsMYiXzKkp3ztaJg tools/libxc/xc_linux_build.c
    1.11 @@ -351,6 +352,7 @@ 4051bcecFeq4DE70p4zGO5setf47CA xen/commo
    1.12  3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
    1.13  4064773cJ31vZt-zhbSoxqft1Jaw0w xen/common/sched_atropos.c
    1.14  40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
    1.15 +40ec1922He_dRhVJdOicTcHvT8v1NQ xen/common/sched_fair_bvt.c
    1.16  40589968be_t_n0-w6ggceW7h-sx0w xen/common/sched_rrobin.c
    1.17  3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
    1.18  405b8599xI_PoEr3zZoJ2on-jdn7iw xen/common/shadow.c
     2.1 --- a/tools/libxc/Makefile	Wed Jun 30 17:10:07 2004 +0000
     2.2 +++ b/tools/libxc/Makefile	Wed Jul 07 15:39:15 2004 +0000
     2.3 @@ -23,6 +23,7 @@ INCLUDES += -I $(XEN_LIBXUTIL)
     2.4  SRCS     :=
     2.5  SRCS     += xc_atropos.c
     2.6  SRCS     += xc_bvtsched.c
     2.7 +SRCS     += xc_fbvtsched.c
     2.8  SRCS     += xc_domain.c
     2.9  SRCS     += xc_evtchn.c
    2.10  SRCS     += xc_io.c
     3.1 --- a/tools/libxc/xc.h	Wed Jun 30 17:10:07 2004 +0000
     3.2 +++ b/tools/libxc/xc.h	Wed Jul 07 15:39:15 2004 +0000
     3.3 @@ -113,6 +113,26 @@ int xc_bvtsched_domain_get(int xc_handle
     3.4                             unsigned long *warpl,
     3.5                             unsigned long *warpu);
     3.6  
     3.7 +int xc_fbvtsched_global_set(int xc_handle,
     3.8 +                           unsigned long ctx_allow);
     3.9 +
    3.10 +int xc_fbvtsched_domain_set(int xc_handle,
    3.11 +                           u32 domid,
    3.12 +                           unsigned long mcuadv,
    3.13 +                           unsigned long warp,
    3.14 +                           unsigned long warpl,
    3.15 +                           unsigned long warpu);
    3.16 +
    3.17 +int xc_fbvtsched_global_get(int xc_handle,
    3.18 +                           unsigned long *ctx_allow);
    3.19 +
    3.20 +int xc_fbvtsched_domain_get(int xc_handle,
    3.21 +                           u32 domid,
    3.22 +                           unsigned long *mcuadv,
    3.23 +                           unsigned long *warp,
    3.24 +                           unsigned long *warpl,
    3.25 +                           unsigned long *warpu);
    3.26 +
    3.27  int xc_atropos_domain_set(int xc_handle,
    3.28                            u32 domid,
    3.29                            u64 period, u64 slice, u64 latency,
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/tools/libxc/xc_fbvtsched.c	Wed Jul 07 15:39:15 2004 +0000
     4.3 @@ -0,0 +1,89 @@
     4.4 +/******************************************************************************
     4.5 + * xc_fbvtsched.c
     4.6 + * 
     4.7 + * API for manipulating parameters of the Fair Borrowed Virtual Time scheduler.
     4.8 + * 
     4.9 + * Copyright (c) 2004, G. Milos
    4.10 + * Based on K. Fraiser's xc_bvtsched.c
    4.11 + */
    4.12 +
    4.13 +#include "xc_private.h"
    4.14 +
    4.15 +int xc_fbvtsched_global_set(int xc_handle,
    4.16 +                           unsigned long ctx_allow)
    4.17 +{
    4.18 +    dom0_op_t op;
    4.19 +
    4.20 +    op.cmd = DOM0_SCHEDCTL;
    4.21 +    op.u.schedctl.sched_id = SCHED_FBVT;
    4.22 +    op.u.schedctl.direction = SCHED_INFO_PUT;
    4.23 +    op.u.schedctl.u.fbvt.ctx_allow = ctx_allow;
    4.24 +
    4.25 +    return do_dom0_op(xc_handle, &op);
    4.26 +}
    4.27 +
    4.28 +int xc_fbvtsched_global_get(int xc_handle,
    4.29 +                           unsigned long *ctx_allow)
    4.30 +{
    4.31 +    dom0_op_t op;
    4.32 +    int ret;
    4.33 +    
    4.34 +    op.cmd = DOM0_SCHEDCTL;
    4.35 +    op.u.schedctl.sched_id = SCHED_FBVT;
    4.36 +    op.u.schedctl.direction = SCHED_INFO_GET;
    4.37 +
    4.38 +    ret = do_dom0_op(xc_handle, &op);
    4.39 +
    4.40 +    *ctx_allow = op.u.schedctl.u.fbvt.ctx_allow;
    4.41 +
    4.42 +    return ret;
    4.43 +}
    4.44 +
    4.45 +int xc_fbvtsched_domain_set(int xc_handle,
    4.46 +                           u32 domid,
    4.47 +                           unsigned long mcuadv,
    4.48 +                           unsigned long warp,
    4.49 +                           unsigned long warpl,
    4.50 +                           unsigned long warpu)
    4.51 +{
    4.52 +    dom0_op_t op;
    4.53 +    struct fbvt_adjdom *fbvtadj = &op.u.adjustdom.u.fbvt;
    4.54 +
    4.55 +    op.cmd = DOM0_ADJUSTDOM;
    4.56 +    op.u.adjustdom.domain  = (domid_t)domid;
    4.57 +    op.u.adjustdom.sched_id = SCHED_FBVT;
    4.58 +    op.u.adjustdom.direction = SCHED_INFO_PUT;
    4.59 +
    4.60 +    fbvtadj->mcu_adv = mcuadv;
    4.61 +    fbvtadj->warp    = warp;
    4.62 +    fbvtadj->warpl   = warpl;
    4.63 +    fbvtadj->warpu   = warpu;
    4.64 +    return do_dom0_op(xc_handle, &op);
    4.65 +}
    4.66 +
    4.67 +
    4.68 +int xc_fbvtsched_domain_get(int xc_handle,
    4.69 +                           u32 domid,
    4.70 +                           unsigned long *mcuadv,
    4.71 +                           unsigned long *warp,
    4.72 +                           unsigned long *warpl,
    4.73 +                           unsigned long *warpu)
    4.74 +{
    4.75 +    
    4.76 +    dom0_op_t op;
    4.77 +    int ret;
    4.78 +    struct fbvt_adjdom *adjptr = &op.u.adjustdom.u.fbvt;
    4.79 +
    4.80 +    op.cmd = DOM0_ADJUSTDOM;
    4.81 +    op.u.adjustdom.domain  = (domid_t)domid;
    4.82 +    op.u.adjustdom.sched_id = SCHED_FBVT;
    4.83 +    op.u.adjustdom.direction = SCHED_INFO_GET;
    4.84 +
    4.85 +    ret = do_dom0_op(xc_handle, &op);
    4.86 +
    4.87 +    *mcuadv = adjptr->mcu_adv;
    4.88 +    *warp   = adjptr->warp;
    4.89 +    *warpl  = adjptr->warpl;
    4.90 +    *warpu  = adjptr->warpu;
    4.91 +    return ret;
    4.92 +}
     5.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Wed Jun 30 17:10:07 2004 +0000
     5.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Wed Jul 07 15:39:15 2004 +0000
     5.3 @@ -479,6 +479,92 @@ static PyObject *pyxc_bvtsched_domain_ge
     5.4                           "warpu",  warpu);
     5.5  }
     5.6  
     5.7 +static PyObject *pyxc_fbvtsched_global_set(PyObject *self,
     5.8 +                                          PyObject *args,
     5.9 +                                          PyObject *kwds)
    5.10 +{
    5.11 +    XcObject *xc = (XcObject *)self;
    5.12 +
    5.13 +    unsigned long ctx_allow;
    5.14 +
    5.15 +    static char *kwd_list[] = { "ctx_allow", NULL };
    5.16 +
    5.17 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "l", kwd_list, &ctx_allow) )
    5.18 +        return NULL;
    5.19 +
    5.20 +    if ( xc_fbvtsched_global_set(xc->xc_handle, ctx_allow) != 0 )
    5.21 +        return PyErr_SetFromErrno(xc_error);
    5.22 +    
    5.23 +    Py_INCREF(zero);
    5.24 +    return zero;
    5.25 +}
    5.26 +
    5.27 +static PyObject *pyxc_fbvtsched_global_get(PyObject *self,
    5.28 +                                          PyObject *args,
    5.29 +                                          PyObject *kwds)
    5.30 +{
    5.31 +    XcObject *xc = (XcObject *)self;
    5.32 +    
    5.33 +    unsigned long ctx_allow;
    5.34 +    
    5.35 +    if ( !PyArg_ParseTuple(args, "") )
    5.36 +        return NULL;
    5.37 +    
    5.38 +    if ( xc_fbvtsched_global_get(xc->xc_handle, &ctx_allow) != 0 )
    5.39 +        return PyErr_SetFromErrno(xc_error);
    5.40 +    
    5.41 +    return Py_BuildValue("s:l", "ctx_allow", ctx_allow);
    5.42 +}
    5.43 +
    5.44 +static PyObject *pyxc_fbvtsched_domain_set(PyObject *self,
    5.45 +                                          PyObject *args,
    5.46 +                                          PyObject *kwds)
    5.47 +{
    5.48 +    XcObject *xc = (XcObject *)self;
    5.49 +
    5.50 +    u32           dom;
    5.51 +    unsigned long mcuadv, warp, warpl, warpu;
    5.52 +
    5.53 +    static char *kwd_list[] = { "dom", "mcuadv", "warp", "warpl",
    5.54 +                                "warpu", NULL };
    5.55 +
    5.56 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "illll", kwd_list,
    5.57 +                                      &dom, &mcuadv, &warp, &warpl, &warpu) )
    5.58 +        return NULL;
    5.59 +
    5.60 +    if ( xc_fbvtsched_domain_set(xc->xc_handle, dom, mcuadv, 
    5.61 +                                warp, warpl, warpu) != 0 )
    5.62 +        return PyErr_SetFromErrno(xc_error);
    5.63 +    
    5.64 +    Py_INCREF(zero);
    5.65 +    return zero;
    5.66 +}
    5.67 +
    5.68 +static PyObject *pyxc_fbvtsched_domain_get(PyObject *self,
    5.69 +                                          PyObject *args,
    5.70 +                                          PyObject *kwds)
    5.71 +{
    5.72 +    XcObject *xc = (XcObject *)self;
    5.73 +    u32 dom;
    5.74 +    unsigned long mcuadv, warp, warpl, warpu;
    5.75 +    
    5.76 +    static char *kwd_list[] = { "dom", NULL };
    5.77 +
    5.78 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i", kwd_list, &dom) )
    5.79 +        return NULL;
    5.80 +    
    5.81 +    if ( xc_fbvtsched_domain_get(xc->xc_handle, dom, &mcuadv, &warp,
    5.82 +                                &warpl, &warpu) != 0 )
    5.83 +        return PyErr_SetFromErrno(xc_error);
    5.84 +
    5.85 +    return Py_BuildValue("{s:i,s:l,s:l,s:l,s:l}",
    5.86 +                         "domain", dom,
    5.87 +                         "mcuadv", mcuadv,
    5.88 +                         "warp",   warp,
    5.89 +                         "warpl",  warpl,
    5.90 +                         "warpu",  warpu);
    5.91 +}
    5.92 +
    5.93  static PyObject *pyxc_evtchn_bind_interdomain(PyObject *self,
    5.94                                                PyObject *args,
    5.95                                                PyObject *kwds)
    5.96 @@ -973,6 +1059,44 @@ static PyMethodDef pyxc_methods[] = {
    5.97        " warpl  [long]: Warp limit,\n"
    5.98      },
    5.99  
   5.100 +    { "fbvtsched_global_set",
   5.101 +      (PyCFunction)pyxc_fbvtsched_global_set,
   5.102 +      METH_VARARGS | METH_KEYWORDS, "\n"
   5.103 +      "Set global tuning parameters for Fair Borrowed Virtual Time scheduler.\n"
   5.104 +      " ctx_allow [int]: Minimal guaranteed quantum.\n\n"
   5.105 +      "Returns: [int] 0 on success; -1 on error.\n" },
   5.106 +
   5.107 +    { "fbvtsched_global_get",
   5.108 +      (PyCFunction)pyxc_fbvtsched_global_get,
   5.109 +      METH_KEYWORDS, "\n"
   5.110 +      "Get global tuning parameters for FBVT scheduler.\n"
   5.111 +      "Returns: [dict]:\n"
   5.112 +      " ctx_allow [int]: context switch allowance\n" },
   5.113 +
   5.114 +    { "fbvtsched_domain_set",
   5.115 +      (PyCFunction)pyxc_fbvtsched_domain_set,
   5.116 +      METH_VARARGS | METH_KEYWORDS, "\n"
   5.117 +      "Set per-domain tuning parameters for Fair Borrowed Virtual Time scheduler.\n"
   5.118 +      " dom    [int]: Identifier of domain to be tuned.\n"
   5.119 +      " mcuadv [int]: Proportional to the inverse of the domain's weight.\n"
   5.120 +      " warp   [int]: How far to warp domain's EVT on unblock.\n"
   5.121 +      " warpl  [int]: How long the domain can run warped.\n"
   5.122 +      " warpu  [int]: How long before the domain can warp again.\n\n"
   5.123 +      "Returns: [int] 0 on success; -1 on error.\n" },
   5.124 +
   5.125 +    { "fbvtsched_domain_get",
   5.126 +      (PyCFunction)pyxc_fbvtsched_domain_get,
   5.127 +      METH_KEYWORDS, "\n"
   5.128 +      "Get per-domain tuning parameters under the FBVT scheduler.\n"
   5.129 +      " dom [int]: Identifier of domain to be queried.\n"
   5.130 +      "Returns [dict]:\n"
   5.131 +      " domain [int]:  Domain ID.\n"
   5.132 +      " mcuadv [long]: MCU Advance.\n"
   5.133 +      " warp   [long]: Warp.\n"
   5.134 +      " warpu  [long]: Unwarp requirement.\n"
   5.135 +      " warpl  [long]: Warp limit,\n"
   5.136 +    },
   5.137 +
   5.138      { "atropos_domain_set",
   5.139        (PyCFunction)pyxc_atropos_domain_set,
   5.140        METH_KEYWORDS, "\n"
     6.1 --- a/tools/python/xen/xend/XendClient.py	Wed Jun 30 17:10:07 2004 +0000
     6.2 +++ b/tools/python/xen/xend/XendClient.py	Wed Jul 07 15:39:15 2004 +0000
     6.3 @@ -171,10 +171,15 @@ class Xend:
     6.4                           {'op'      : 'cpu_rrobin_slice_set',
     6.5                            'slice'   : slice })
     6.6      
     6.7 -    def xend_node_cpu_bvt_slice_set(self, slice):
     6.8 +    def xend_node_cpu_bvt_slice_set(self, ctx_allow):
     6.9          return xend_call(self.nodeurl(),
    6.10                           {'op'      : 'cpu_bvt_slice_set',
    6.11 -                          'slice'   : slice })
    6.12 +                          'ctx_allow'   : ctx_allow })
    6.13 +    
    6.14 +    def xend_node_cpu_fbvt_slice_set(self, ctx_allow):
    6.15 +        return xend_call(self.nodeurl(),
    6.16 +                         {'op'      : 'cpu_fbvt_slice_set',
    6.17 +                          'ctx_allow'   : ctx_allow })
    6.18  
    6.19      def xend_domains(self):
    6.20          return xend_get(self.domainurl())
    6.21 @@ -226,10 +231,19 @@ class Xend:
    6.22      def xend_domain_cpu_bvt_set(self, id, mcuadv, warp, warpl, warpu):
    6.23          return xend_call(self.domainurl(id),
    6.24                           {'op'      : 'cpu_bvt_set',
    6.25 -                          'mcuadv'  : mvuadv,
    6.26 +                          'mcuadv'  : mcuadv,
    6.27                            'warp'    : warp,
    6.28                            'warpl'   : warpl,
    6.29                            'warpu'   : warpu })
    6.30 +    
    6.31 +    def xend_domain_cpu_fbvt_set(self, id, mcuadv, warp, warpl, warpu):
    6.32 +        return xend_call(self.domainurl(id),
    6.33 +                         {'op'      : 'cpu_fbvt_set',
    6.34 +                          'mcuadv'  : mcuadv,
    6.35 +                          'warp'    : warp,
    6.36 +                          'warpl'   : warpl,
    6.37 +                          'warpu'   : warpu })
    6.38 +
    6.39  
    6.40      def xend_domain_cpu_atropos_set(self, id, period, slice, latency, xtratime):
    6.41          return xend_call(self.domainurl(id),
     7.1 --- a/tools/python/xen/xend/XendDomain.py	Wed Jun 30 17:10:07 2004 +0000
     7.2 +++ b/tools/python/xen/xend/XendDomain.py	Wed Jul 07 15:39:15 2004 +0000
     7.3 @@ -363,6 +363,19 @@ class XendDomain:
     7.4          dom = int(dom)
     7.5          return xc.bvtsched_domain_get(dom)
     7.6      
     7.7 +    def domain_cpu_fbvt_set(self, dom, mcuadv, warp, warpl, warpu):
     7.8 +        """Set FBVT (Fair Borrowed Virtual Time) scheduler parameters for a domain.
     7.9 +        """
    7.10 +        dom = int(dom)
    7.11 +        return xc.fbvtsched_domain_set(dom=dom, mcuadv=mcuadv,
    7.12 +                                      warp=warp, warpl=warpl, warpu=warpu)
    7.13 +
    7.14 +    def domain_cpu_fbvt_get(self, dom):
    7.15 +        """Get FBVT (Fair Borrowed Virtual Time) scheduler parameters for a domain.
    7.16 +        """
    7.17 +        dom = int(dom)
    7.18 +        return xc.fbvtsched_domain_get(dom)
    7.19 +        
    7.20      def domain_cpu_atropos_set(self, dom, period, slice, latency, xtratime):
    7.21          """Set Atropos scheduler parameters for a domain.
    7.22          """
     8.1 --- a/tools/python/xen/xend/XendNode.py	Wed Jun 30 17:10:07 2004 +0000
     8.2 +++ b/tools/python/xen/xend/XendNode.py	Wed Jul 07 15:39:15 2004 +0000
     8.3 @@ -24,12 +24,22 @@ class XendNode:
     8.4      def notify(self, uri):
     8.5          return 0
     8.6      
     8.7 -    def cpu_bvt_slice_set(self, slice):
     8.8 +    def cpu_bvt_slice_set(self, ctx_allow):
     8.9          ret = 0
    8.10          #ret = self.xc.bvtsched_global_set(ctx_allow=slice)
    8.11          return ret
    8.12  
    8.13 -    def cpu_bvt_slice_get(self, slice):
    8.14 +    def cpu_bvt_slice_get(self, ctx_allow):
    8.15 +        ret = 0
    8.16 +        #ret = self.xc.bvtsched_global_get()
    8.17 +        return ret
    8.18 +    
    8.19 +    def cpu_fbvt_slice_set(self, ctx_allow):
    8.20 +        ret = 0
    8.21 +        #ret = self.xc.bvtsched_global_set(ctx_allow=slice)
    8.22 +        return ret
    8.23 +
    8.24 +    def cpu_fbvt_slice_get(self, ctx_allow):
    8.25          ret = 0
    8.26          #ret = self.xc.bvtsched_global_get()
    8.27          return ret
     9.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Wed Jun 30 17:10:07 2004 +0000
     9.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Wed Jul 07 15:39:15 2004 +0000
     9.3 @@ -70,6 +70,16 @@ class SrvDomain(SrvDir):
     9.4                       ['warpu', 'int']])
     9.5          val = fn(req.args, {'dom': self.dom.id})
     9.6          return val
     9.7 +    
     9.8 +    def op_cpu_fbvt_set(self, op, req):
     9.9 +        fn = FormFn(self.xd.domain_cpu_fbvt_set,
    9.10 +                    [['dom', 'int'],
    9.11 +                     ['mcuadv', 'int'],
    9.12 +                     ['warp', 'int'],
    9.13 +                     ['warpl', 'int'],
    9.14 +                     ['warpu', 'int']])
    9.15 +        val = fn(req.args, {'dom': self.dom.id})
    9.16 +        return val
    9.17  
    9.18      def op_cpu_atropos_set(self, op, req):
    9.19          fn = FormFn(self.xd.domain_cpu_atropos_set,
    10.1 --- a/tools/python/xen/xend/server/SrvNode.py	Wed Jun 30 17:10:07 2004 +0000
    10.2 +++ b/tools/python/xen/xend/server/SrvNode.py	Wed Jul 07 15:39:15 2004 +0000
    10.3 @@ -4,6 +4,7 @@ import os
    10.4  from SrvDir import SrvDir
    10.5  from xen.xend import sxp
    10.6  from xen.xend import XendNode
    10.7 +from xen.xend.Args import FormFn
    10.8  
    10.9  class SrvNode(SrvDir):
   10.10      """Information about the node.
   10.11 @@ -29,7 +30,13 @@ class SrvNode(SrvDir):
   10.12  
   10.13      def op_cpu_bvt_slice_set(self, op, req):
   10.14          fn = FormFn(self.xn.cpu_bvt_slice_set,
   10.15 -                    [['slice', 'int']])
   10.16 +                    [['ctx_allow', 'int']])
   10.17 +        val = fn(req.args, {})
   10.18 +        return val
   10.19 +    
   10.20 +    def op_cpu_fbvt_slice_set(self, op, req):
   10.21 +        fn = FormFn(self.xn.cpu_fbvt_slice_set,
   10.22 +                    [['ctx_allow', 'int']])
   10.23          val = fn(req.args, {})
   10.24          return val
   10.25  
    11.1 --- a/tools/python/xen/xm/main.py	Wed Jun 30 17:10:07 2004 +0000
    11.2 +++ b/tools/python/xen/xm/main.py	Wed Jul 07 15:39:15 2004 +0000
    11.3 @@ -347,19 +347,53 @@ xm.prog(ProgBvt)
    11.4  
    11.5  class ProgBvtslice(Prog):
    11.6      group = 'scheduler'
    11.7 -    name = "bvtslice"
    11.8 -    info = """Set the BVT scheduler slice."""
    11.9 +    name = "bvt_ctxallow"
   11.10 +    info = """Set the BVT scheduler context switch allowance."""
   11.11  
   11.12      def help(self, args):
   11.13 -        print args[0], 'SLICE'
   11.14 -        print '\nSet Borrowed Virtual Time scheduler slice.'
   11.15 +        print args[0], 'CTX_ALLOW'
   11.16 +        print '\nSet Borrowed Virtual Time scheduler context switch allowance.'
   11.17  
   11.18      def main(self, args):
   11.19 -        if len(args) < 2: self.err('%s: Missing slice' % args[0])
   11.20 +        if len(args) < 2: self.err('%s: Missing context switch allowance'
   11.21 +                                                            % args[0])
   11.22          server.xend_node_cpu_bvt_slice_set(slice)
   11.23  
   11.24  xm.prog(ProgBvtslice)
   11.25  
   11.26 +class ProgFbvt(Prog):
   11.27 +    group = 'scheduler'
   11.28 +    name = "fbvt"
   11.29 +    info = """Set FBVT scheduler parameters."""
   11.30 +    
   11.31 +    def help(self, args):
   11.32 +        print args[0], "DOM MCUADV WARP WARPL WARPU"
   11.33 +        print '\nSet Fair Borrowed Virtual Time scheduler parameters.'
   11.34 +
   11.35 +    def main(self, args):
   11.36 +        if len(args) != 6: self.err("%s: Invalid argument(s)" % args[0])
   11.37 +        v = map(int, args[1:6])
   11.38 +        server.xend_domain_cpu_fbvt_set(*v)
   11.39 +
   11.40 +xm.prog(ProgFbvt)
   11.41 +
   11.42 +class ProgFbvtslice(Prog):
   11.43 +    group = 'scheduler'
   11.44 +    name = "fbvt_ctxallow"
   11.45 +    info = """Set the FBVT scheduler context switch allowance."""
   11.46 +
   11.47 +    def help(self, args):
   11.48 +        print args[0], 'CTX_ALLOW'
   11.49 +        print '\nSet Fair Borrowed Virtual Time scheduler context switch allowance.'
   11.50 +
   11.51 +    def main(self, args):
   11.52 +        if len(args) < 2: self.err('%s: Missing context switch allowance.' 
   11.53 +                                                                % args[0])
   11.54 +        server.xend_node_cpu_fbvt_slice_set(slice)
   11.55 +
   11.56 +xm.prog(ProgFbvtslice)
   11.57 +
   11.58 +
   11.59  class ProgAtropos(Prog):
   11.60      group = 'scheduler'
   11.61      name= "atropos"
    12.1 --- a/xen/common/sched_bvt.c	Wed Jun 30 17:10:07 2004 +0000
    12.2 +++ b/xen/common/sched_bvt.c	Wed Jul 07 15:39:15 2004 +0000
    12.3 @@ -380,7 +380,6 @@ static task_slice_t bvt_do_schedule(s_ti
    12.4      next->min_slice = ctx_allow;
    12.5      ret.task = next;
    12.6      ret.time = r_time;
    12.7 -
    12.8      return ret;
    12.9  }
   12.10  
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen/common/sched_fair_bvt.c	Wed Jul 07 15:39:15 2004 +0000
    13.3 @@ -0,0 +1,538 @@
    13.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    13.5 + ****************************************************************************
    13.6 + * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
    13.7 + * (C) 2002-2003 University of Cambridge
    13.8 + * (C) 2004      - Mark Williamson - Intel Research Cambridge
    13.9 + ****************************************************************************
   13.10 + *
   13.11 + *        File: common/schedule.c
   13.12 + *      Author: Rolf Neugebauer & Keir Fraser
   13.13 + *              Updated for generic API by Mark Williamson
   13.14 + *
   13.15 + * Description: CPU scheduling
   13.16 + *              implements A Borrowed Virtual Time scheduler.
   13.17 + *              (see Duda & Cheriton SOSP'99)
   13.18 + */
   13.19 +
   13.20 +#include <xen/config.h>
   13.21 +#include <xen/init.h>
   13.22 +#include <xen/lib.h>
   13.23 +#include <xen/sched.h>
   13.24 +#include <xen/delay.h>
   13.25 +#include <xen/event.h>
   13.26 +#include <xen/time.h>
   13.27 +#include <xen/ac_timer.h>
   13.28 +#include <xen/perfc.h>
   13.29 +#include <xen/sched-if.h>
   13.30 +#include <xen/slab.h>
   13.31 +
   13.32 +
   13.33 +/* all per-domain BVT-specific scheduling info is stored here */
   13.34 +struct fbvt_dom_info
   13.35 +{
   13.36 +    unsigned long mcu_advance;      /* inverse of weight */
   13.37 +    u32           avt;              /* actual virtual time */
   13.38 +    u32           evt;              /* effective virtual time */
   13.39 +    u32		      time_slept;	    /* records amount of time slept, used for scheduling */
   13.40 +    u32		      vtb;	    	    /* virtual time bonus */
   13.41 +    int           warpback;         /* warp?  */
   13.42 +    long          warp;             /* virtual time warp */
   13.43 +    long          warpl;            /* warp limit */
   13.44 +    long          warpu;            /* unwarp time requirement */
   13.45 +    s_time_t      warped;           /* time it ran warped last time */
   13.46 +    s_time_t      uwarped;          /* time it ran unwarped last time */
   13.47 +};
   13.48 +
   13.49 +struct fbvt_cpu_info
   13.50 +{
   13.51 +    unsigned long svt; /* XXX check this is unsigned long! */
   13.52 +};
   13.53 +
   13.54 +
   13.55 +#define FBVT_INFO(p)   ((struct fbvt_dom_info *)(p)->sched_priv)
   13.56 +#define CPU_INFO(cpu) ((struct fbvt_cpu_info *)(schedule_data[cpu]).sched_priv)
   13.57 +#define CPU_SVT(cpu)  (CPU_INFO(cpu)->svt)
   13.58 +
   13.59 +#define MCU            (s32)MICROSECS(100)    /* Minimum unit */
   13.60 +#define MCU_ADVANCE    10                     /* default weight */
   13.61 +#define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
   13.62 +static s32 ctx_allow = (s32)MILLISECS(5);     /* context switch allowance */
   13.63 +
   13.64 +/* SLAB cache for struct fbvt_dom_info objects */
   13.65 +static kmem_cache_t *dom_info_cache;
   13.66 +
   13.67 +/*
   13.68 + * Calculate the effective virtual time for a domain. Take into account 
   13.69 + * warping limits
   13.70 + */
   13.71 +static void __calc_evt(struct fbvt_dom_info *inf)
   13.72 +{
   13.73 +    s_time_t now = NOW();
   13.74 +
   13.75 +    if ( inf->warpback ) 
   13.76 +    {
   13.77 +        if ( ((now - inf->warped) < inf->warpl) &&
   13.78 +             ((now - inf->uwarped) > inf->warpu) )
   13.79 +        {
   13.80 +            /* allowed to warp */
   13.81 +            inf->evt = inf->avt - inf->warp;
   13.82 +        } 
   13.83 +        else 
   13.84 +        {
   13.85 +            /* warped for too long -> unwarp */
   13.86 +            inf->evt      = inf->avt;
   13.87 +            inf->uwarped  = now;
   13.88 +            inf->warpback = 0;
   13.89 +        }
   13.90 +    } 
   13.91 +    else 
   13.92 +    {
   13.93 +        inf->evt = inf->avt;
   13.94 +    }
   13.95 +}
   13.96 +
   13.97 +/**
   13.98 + * fbvt_alloc_task - allocate FBVT private structures for a task
   13.99 + * @p:              task to allocate private structures for
  13.100 + *
  13.101 + * Returns non-zero on failure.
  13.102 + */
  13.103 +int fbvt_alloc_task(struct domain *p)
  13.104 +{
  13.105 +    p->sched_priv = kmem_cache_alloc(dom_info_cache);
  13.106 +    if ( p->sched_priv == NULL )
  13.107 +        return -1;
  13.108 +    
  13.109 +    return 0;
  13.110 +}
  13.111 +
  13.112 +/*
  13.113 + * Add and remove a domain
  13.114 + */
  13.115 +void fbvt_add_task(struct domain *p) 
  13.116 +{
  13.117 +    struct fbvt_dom_info *inf = FBVT_INFO(p);
  13.118 +
  13.119 +    ASSERT(inf != NULL);
  13.120 +    ASSERT(p   != NULL);
  13.121 +
  13.122 +    inf->mcu_advance = MCU_ADVANCE;
  13.123 +    if ( p->domain == IDLE_DOMAIN_ID )
  13.124 +    {
  13.125 +        inf->avt = inf->evt = ~0U;
  13.126 +    } 
  13.127 +    else 
  13.128 +    {
  13.129 +        /* Set avt and evt to system virtual time. */
  13.130 +        inf->avt         = CPU_SVT(p->processor);
  13.131 +        inf->evt         = CPU_SVT(p->processor);
  13.132 +        /* Set some default values here. */
  13.133 +		inf->vtb	     = 0;
  13.134 +		inf->time_slept  = 0;
  13.135 +        inf->warpback    = 0;
  13.136 +        inf->warp        = 0;
  13.137 +        inf->warpl       = 0;
  13.138 +        inf->warpu       = 0;
  13.139 +    }
  13.140 +
  13.141 +    return;
  13.142 +}
  13.143 +
  13.144 +/**
  13.145 + * fbvt_free_task - free FBVT private structures for a task
  13.146 + * @p:             task
  13.147 + */
  13.148 +void fbvt_free_task(struct domain *p)
  13.149 +{
  13.150 +    ASSERT( p->sched_priv != NULL );
  13.151 +    kmem_cache_free( dom_info_cache, p->sched_priv );
  13.152 +}
  13.153 +
  13.154 +
  13.155 +void fbvt_wake_up(struct domain *p)
  13.156 +{
  13.157 +    struct fbvt_dom_info *inf = FBVT_INFO(p);
  13.158 +    s32 io_warp;
  13.159 +
  13.160 +    ASSERT(inf != NULL);
  13.161 +    
  13.162 +
  13.163 +    /* set the BVT parameters */
  13.164 +    if (inf->avt < CPU_SVT(p->processor))
  13.165 +    {
  13.166 +		/*
  13.167 +	  	 *We want IO bound processes to gain
  13.168 +		 *dispatch precedence. It is especially for
  13.169 +		 *device driver domains. Therefore AVT should not be updated
  13.170 +		 *to SVT but to a value marginally smaller.
  13.171 +		 *Since frequently sleeping domains have high time_slept
  13.172 +		 *values, the virtual time can be determined as:
  13.173 +		 *SVT - const * TIME_SLEPT
  13.174 +	 	 */
  13.175 +	
  13.176 +		io_warp = (int)(0.5 * inf->time_slept);
  13.177 +		if(io_warp > 10000) io_warp = 10000;
  13.178 +
  13.179 +		ASSERT(inf->time_slept + CPU_SVT(p->processor) > inf->avt + io_warp);
  13.180 +		inf->time_slept += CPU_SVT(p->processor) - inf->avt - io_warp;
  13.181 +        inf->avt = CPU_SVT(p->processor) - io_warp;
  13.182 +    }
  13.183 +
  13.184 +    /* deal with warping here */
  13.185 +    inf->warpback  = 1;
  13.186 +    inf->warped    = NOW();
  13.187 +    __calc_evt(inf);
  13.188 +    __add_to_runqueue_head(p);
  13.189 +}
  13.190 +
  13.191 +/* 
  13.192 + * Block the currently-executing domain until a pertinent event occurs.
  13.193 + */
  13.194 +static void fbvt_do_block(struct domain *p)
  13.195 +{
  13.196 +    FBVT_INFO(p)->warpback = 0; 
  13.197 +}
  13.198 +
  13.199 +/* Control the scheduler. */
  13.200 +int fbvt_ctl(struct sched_ctl_cmd *cmd)
  13.201 +{
  13.202 +    struct fbvt_ctl *params = &cmd->u.fbvt;
  13.203 +
  13.204 +    if ( cmd->direction == SCHED_INFO_PUT )
  13.205 +    { 
  13.206 +        ctx_allow = params->ctx_allow;
  13.207 +    }
  13.208 +    else
  13.209 +    {
  13.210 +        params->ctx_allow = ctx_allow;
  13.211 +    }
  13.212 +    
  13.213 +    return 0;
  13.214 +}
  13.215 +
  13.216 +/* Adjust scheduling parameter for a given domain. */
  13.217 +int fbvt_adjdom(struct domain *p,
  13.218 +               struct sched_adjdom_cmd *cmd)
  13.219 +{
  13.220 +    struct fbvt_adjdom *params = &cmd->u.fbvt;
  13.221 +    unsigned long flags;
  13.222 +
  13.223 +    if ( cmd->direction == SCHED_INFO_PUT )
  13.224 +    {
  13.225 +        unsigned long mcu_adv = params->mcu_adv,
  13.226 +            warp  = params->warp,
  13.227 +            warpl = params->warpl,
  13.228 +            warpu = params->warpu;
  13.229 +        
  13.230 +        struct fbvt_dom_info *inf = FBVT_INFO(p);
  13.231 +        
  13.232 +        DPRINTK("Get domain %u fbvt mcu_adv=%ld, warp=%ld, "
  13.233 +                "warpl=%ld, warpu=%ld\n",
  13.234 +                p->domain, inf->mcu_advance, inf->warp,
  13.235 +                inf->warpl, inf->warpu );
  13.236 +
  13.237 +        /* Sanity -- this can avoid divide-by-zero. */
  13.238 +        if ( mcu_adv == 0 )
  13.239 +            return -EINVAL;
  13.240 +        
  13.241 +        spin_lock_irqsave(&schedule_lock[p->processor], flags);   
  13.242 +        inf->mcu_advance = mcu_adv;
  13.243 +        inf->warp = warp;
  13.244 +        inf->warpl = warpl;
  13.245 +        inf->warpu = warpu;
  13.246 +
  13.247 +        DPRINTK("Set domain %u fbvt mcu_adv=%ld, warp=%ld, "
  13.248 +                "warpl=%ld, warpu=%ld\n",
  13.249 +                p->domain, inf->mcu_advance, inf->warp,
  13.250 +                inf->warpl, inf->warpu );
  13.251 +
  13.252 +        spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
  13.253 +    }
  13.254 +    else if ( cmd->direction == SCHED_INFO_GET )
  13.255 +    {
  13.256 +        struct fbvt_dom_info *inf = FBVT_INFO(p);
  13.257 +
  13.258 +        spin_lock_irqsave(&schedule_lock[p->processor], flags);   
  13.259 +        params->mcu_adv = inf->mcu_advance;
  13.260 +        params->warp    = inf->warp;
  13.261 +        params->warpl   = inf->warpl;
  13.262 +        params->warpu   = inf->warpu;
  13.263 +        spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
  13.264 +    }
  13.265 +    
  13.266 +    return 0;
  13.267 +}
  13.268 +
  13.269 +
  13.270 +/* 
  13.271 + * The main function
  13.272 + * - deschedule the current domain.
  13.273 + * - pick a new domain.
  13.274 + *   i.e., the domain with lowest EVT.
  13.275 + *   The runqueue should be ordered by EVT so that is easy.
  13.276 + */
  13.277 +static task_slice_t fbvt_do_schedule(s_time_t now)
  13.278 +{
  13.279 +    struct domain *prev = current, *next = NULL, *next_prime, *p;
  13.280 +    struct list_head   *tmp;
  13.281 +    int                 cpu = prev->processor;
  13.282 +    s32                 r_time;     /* time for new dom to run */
  13.283 +    s32                 ranfor;     /* assume we never run longer than 2.1s! */
  13.284 +    s32                 mcus;
  13.285 +    u32                 next_evt, next_prime_evt, min_avt;
  13.286 +    struct fbvt_dom_info *prev_inf       = FBVT_INFO(prev),
  13.287 +                        *p_inf          = NULL,
  13.288 +                        *next_inf       = NULL,
  13.289 +                        *next_prime_inf = NULL;
  13.290 +    task_slice_t        ret;
  13.291 +
  13.292 +    ASSERT(prev->sched_priv != NULL);
  13.293 +    ASSERT(prev_inf != NULL);
  13.294 +
  13.295 +    if ( likely(!is_idle_task(prev)) ) 
  13.296 +    {
  13.297 +        ranfor = (s32)(now - prev->lastschd);
  13.298 +        /* Calculate mcu and update avt. */
  13.299 +        mcus = (ranfor + MCU - 1) / MCU;
  13.300 +    if(mcus * prev_inf->mcu_advance < prev_inf->vtb)
  13.301 +	{
  13.302 +	    ASSERT(prev_inf->time_slept >= mcus * prev_inf->mcu_advance);
  13.303 +    	prev_inf->time_slept -= mcus * prev_inf->mcu_advance;
  13.304 +	}
  13.305 +	else
  13.306 +	{
  13.307 +	    prev_inf->avt += mcus * prev_inf->mcu_advance - prev_inf->vtb;
  13.308 +		
  13.309 +	    ASSERT(prev_inf->time_slept >= prev_inf->vtb);
  13.310 +	    prev_inf->time_slept -= prev_inf->vtb;
  13.311 + 	}
  13.312 +        
  13.313 +        __calc_evt(prev_inf);
  13.314 +        
  13.315 +        __del_from_runqueue(prev);
  13.316 +        
  13.317 +        if ( domain_runnable(prev) )
  13.318 +            __add_to_runqueue_tail(prev);
  13.319 +    }
  13.320 +
  13.321 +    /* We should at least have the idle task */
  13.322 +    ASSERT(!list_empty(&schedule_data[cpu].runqueue));
  13.323 +
  13.324 +    /*
  13.325 +     * scan through the run queue and pick the task with the lowest evt
  13.326 +     * *and* the task the second lowest evt.
  13.327 +     * this code is O(n) but we expect n to be small.
  13.328 +     */
  13.329 +    next       = schedule_data[cpu].idle;
  13.330 +    next_prime = NULL;
  13.331 +
  13.332 +    next_evt       = ~0U;
  13.333 +    next_prime_evt = ~0U;
  13.334 +    min_avt        = ~0U;
  13.335 +
  13.336 +    list_for_each ( tmp, &schedule_data[cpu].runqueue )
  13.337 +    {
  13.338 +        p     = list_entry(tmp, struct domain, run_list);
  13.339 +        p_inf = FBVT_INFO(p);
  13.340 +
  13.341 +        if ( p_inf->evt < next_evt )
  13.342 +        {
  13.343 +            next_prime     = next;
  13.344 +            next_prime_evt = next_evt;
  13.345 +            next = p;
  13.346 +            next_evt = p_inf->evt;
  13.347 +        } 
  13.348 +        else if ( next_prime_evt == ~0U )
  13.349 +        {
  13.350 +            next_prime_evt = p_inf->evt;
  13.351 +            next_prime     = p;
  13.352 +        } 
  13.353 +        else if ( p_inf->evt < next_prime_evt )
  13.354 +        {
  13.355 +            next_prime_evt = p_inf->evt;
  13.356 +            next_prime     = p;
  13.357 +        }
  13.358 +
  13.359 +        /* Determine system virtual time. */
  13.360 +        if ( p_inf->avt < min_avt )
  13.361 +            min_avt = p_inf->avt;
  13.362 +    }
  13.363 +
  13.364 +    /* Update system virtual time. */
  13.365 +    if ( min_avt != ~0U )
  13.366 +        CPU_SVT(cpu) = min_avt;
  13.367 +
  13.368 +    /* check for virtual time overrun on this cpu */
  13.369 +    if ( CPU_SVT(cpu) >= 0xf0000000 )
  13.370 +    {
  13.371 +        u_long t_flags; 
  13.372 +        write_lock_irqsave(&tasklist_lock, t_flags); 
  13.373 +        for_each_domain ( p )
  13.374 +        {
  13.375 +            if ( p->processor == cpu )
  13.376 +            {
  13.377 +                p_inf = FBVT_INFO(p);
  13.378 +                p_inf->evt -= 0xe0000000;
  13.379 +                p_inf->avt -= 0xe0000000;
  13.380 +            }
  13.381 +        } 
  13.382 +        write_unlock_irqrestore(&tasklist_lock, t_flags); 
  13.383 +        CPU_SVT(cpu) -= 0xe0000000;
  13.384 +    }
  13.385 +
  13.386 +    next_prime_inf = FBVT_INFO(next_prime);
  13.387 +    next_inf       = FBVT_INFO(next);
  13.388 +    
  13.389 +    /* check for time_slept overrun for the domain we schedule to run*/
  13.390 +    if(next_inf->time_slept >= 0xf0000000)
  13.391 +    {
  13.392 +        printk("Domain %d is assigned more CPU then it is able to use.\n"
  13.393 +               "FBVT slept_time=%d, halving. Mcu_advance=%ld\n",next->domain, 
  13.394 +               next_inf->time_slept, next_inf->mcu_advance);
  13.395 +
  13.396 +        next_inf->time_slept /= 2;
  13.397 +    }
  13.398 +
  13.399 +
  13.400 +   /*
  13.401 +     * In here we decide on Virtual Time Bonus. The idea is, for the
  13.402 +     * domains that have large time_slept values to be allowed to run
  13.403 +     * for longer. Thus regaining the share of CPU originally allocated.
  13.404 +     * This is acompanied by the warp mechanism (which moves IO-bound
  13.405 +     * domains earlier in virtual time). Together this should give quite
  13.406 +     * good control both for CPU and IO-bound domains.
  13.407 +     */
  13.408 +    next_inf->vtb = (int)(0.2 * next_inf->time_slept);
  13.409 +    if(next_inf->vtb > 1000) next_inf->vtb = 1000;
  13.410 +
  13.411 +
  13.412 +    /* work out time for next run through scheduler */
  13.413 +    if ( is_idle_task(next) ) 
  13.414 +    {
  13.415 +        r_time = ctx_allow;
  13.416 +        goto sched_done;
  13.417 +    }
  13.418 +
  13.419 +    if ( (next_prime == NULL) || is_idle_task(next_prime) )
  13.420 +    {
  13.421 +        /* We have only one runnable task besides the idle task. */
  13.422 +        r_time = 10 * ctx_allow;     /* RN: random constant */
  13.423 +        goto sched_done;
  13.424 +    }
  13.425 +
  13.426 +    /*
  13.427 +     * If we are here then we have two runnable tasks.
  13.428 +     * Work out how long 'next' can run till its evt is greater than
  13.429 +     * 'next_prime's evt. Take context switch allowance into account.
  13.430 +     */
  13.431 +    ASSERT(next_prime_inf->evt >= next_inf->evt);
  13.432 +   
  13.433 +    r_time = ((next_prime_inf->evt + next_inf->vtb - next_inf->evt)/next_inf->mcu_advance)
  13.434 +        + ctx_allow;
  13.435 +
  13.436 +    ASSERT(r_time >= ctx_allow);
  13.437 +
  13.438 + sched_done:
  13.439 +    next->min_slice = ctx_allow;
  13.440 +    ret.task = next;
  13.441 +    ret.time = r_time;
  13.442 +    return ret;
  13.443 +}
  13.444 +
  13.445 +
  13.446 +static void fbvt_dump_runq_el(struct domain *p)
  13.447 +{
  13.448 +    struct fbvt_dom_info *inf = FBVT_INFO(p);
  13.449 +    
  13.450 +    printk("mcua=0x%04lX ev=0x%08X av=0x%08X sl=0x%08X vtb=0x%08X ",
  13.451 +           inf->mcu_advance, inf->evt, inf->avt, inf->time_slept, inf->vtb);
  13.452 +}
  13.453 +
  13.454 +static void fbvt_dump_settings(void)
  13.455 +{
  13.456 +    printk("FBVT: mcu=0x%08Xns ctx_allow=0x%08Xns ", (u32)MCU, (s32)ctx_allow );
  13.457 +}
  13.458 +
  13.459 +static void fbvt_dump_cpu_state(int i)
  13.460 +{
  13.461 +    printk("svt=0x%08lX ", CPU_SVT(i));
  13.462 +}
  13.463 +
  13.464 +
  13.465 +/* Initialise the data structures. */
  13.466 +int fbvt_init_scheduler()
  13.467 +{
  13.468 +    int i;
  13.469 +
  13.470 +    for ( i = 0; i < NR_CPUS; i++ )
  13.471 +    {
  13.472 +        schedule_data[i].sched_priv = kmalloc(sizeof(struct fbvt_cpu_info));
  13.473 +        if ( schedule_data[i].sched_priv == NULL )
  13.474 +        {
  13.475 +            printk("Failed to allocate FBVT scheduler per-CPU memory!\n");
  13.476 +            return -1;
  13.477 +        }
  13.478 +
  13.479 +        CPU_SVT(i) = 0; /* XXX do I really need to do this? */
  13.480 +    }
  13.481 +
  13.482 +    dom_info_cache = kmem_cache_create("FBVT dom info",
  13.483 +                                       sizeof(struct fbvt_dom_info),
  13.484 +                                       0, 0, NULL, NULL);
  13.485 +
  13.486 +    if ( dom_info_cache == NULL )
  13.487 +    {
  13.488 +        printk("FBVT: Failed to allocate domain info SLAB cache");
  13.489 +        return -1;
  13.490 +    }
  13.491 +
  13.492 +    return 0;
  13.493 +}
  13.494 +
  13.495 +static void fbvt_pause(struct domain *p)
  13.496 +{
  13.497 +    if( __task_on_runqueue(p) )
  13.498 +    {
  13.499 +        __del_from_runqueue(p);
  13.500 +    }
  13.501 +}
  13.502 +
  13.503 +static void fbvt_unpause(struct domain *p)
  13.504 +{
  13.505 +	struct fbvt_dom_info *inf = FBVT_INFO(p);
  13.506 +
  13.507 +	if ( p->domain == IDLE_DOMAIN_ID )
  13.508 +    {
  13.509 +        inf->avt = inf->evt = ~0U;
  13.510 +    } 
  13.511 +    else 
  13.512 +    {
  13.513 +        /* Set avt to system virtual time. */
  13.514 +        inf->avt         = CPU_SVT(p->processor);
  13.515 +        /* Set some default values here. */
  13.516 +		inf->vtb	 = 0;
  13.517 +		__calc_evt(inf);
  13.518 +    }
  13.519 +}
  13.520 +
  13.521 +struct scheduler sched_fbvt_def = {
  13.522 +    .name     = "Fair Borrowed Virtual Time",
  13.523 +    .opt_name = "fbvt",
  13.524 +    .sched_id = SCHED_FBVT,
  13.525 +    
  13.526 +    .init_scheduler = fbvt_init_scheduler,
  13.527 +    .alloc_task     = fbvt_alloc_task,
  13.528 +    .add_task       = fbvt_add_task,
  13.529 +    .free_task      = fbvt_free_task,
  13.530 +    .wake_up        = fbvt_wake_up,
  13.531 +    .do_block       = fbvt_do_block,
  13.532 +    .do_schedule    = fbvt_do_schedule,
  13.533 +    .control        = fbvt_ctl,
  13.534 +    .adjdom         = fbvt_adjdom,
  13.535 +    .dump_settings  = fbvt_dump_settings,
  13.536 +    .dump_cpu_state = fbvt_dump_cpu_state,
  13.537 +    .dump_runq_el   = fbvt_dump_runq_el,
  13.538 +    .pause          = fbvt_pause,
  13.539 +    .unpause	    = fbvt_unpause,
  13.540 +};
  13.541 +
    14.1 --- a/xen/common/schedule.c	Wed Jun 30 17:10:07 2004 +0000
    14.2 +++ b/xen/common/schedule.c	Wed Jul 07 15:39:15 2004 +0000
    14.3 @@ -71,8 +71,9 @@ schedule_data_t schedule_data[NR_CPUS];
    14.4   * TODO: It would be nice if the schedulers array could get populated
    14.5   * automagically without having to hack the code in here.
    14.6   */
    14.7 -extern struct scheduler sched_bvt_def, sched_rrobin_def, sched_atropos_def;
    14.8 +extern struct scheduler sched_bvt_def, sched_fbvt_def, sched_rrobin_def, sched_atropos_def;
    14.9  static struct scheduler *schedulers[] = { &sched_bvt_def,
   14.10 +					  &sched_fbvt_def,
   14.11                                            &sched_rrobin_def,
   14.12                                            &sched_atropos_def,
   14.13                                            NULL};
   14.14 @@ -225,6 +226,25 @@ void domain_wake(struct domain *d)
   14.15      spin_unlock_irqrestore(&schedule_lock[cpu], flags);
   14.16  }
   14.17  
   14.18 +/*
   14.19 + * Pausing a domain.
   14.20 + */
   14.21 +void pause_domain(struct domain *domain)
   14.22 +{
   14.23 +	domain_sleep(domain);
   14.24 +	SCHED_OP(pause, domain);	
   14.25 +}
   14.26 +
   14.27 +
   14.28 +/*
   14.29 + * Unpauseing a domain
   14.30 + */
   14.31 +void unpause_domain(struct domain *domain)
   14.32 +{
   14.33 +	SCHED_OP(unpause, domain);
   14.34 +	domain_wake(domain);
   14.35 +}
   14.36 +
   14.37  /* Block the currently-executing domain until a pertinent event occurs. */
   14.38  long do_block(void)
   14.39  {
   14.40 @@ -361,6 +381,7 @@ void __enter_scheduler(void)
   14.41      rem_ac_timer(&schedule_data[cpu].s_timer);
   14.42      
   14.43      ASSERT(!in_irq());
   14.44 +if(!__task_on_runqueue(prev)) printk("Domain %d not on runqueue\n",prev->domain);
   14.45      ASSERT(__task_on_runqueue(prev));
   14.46  
   14.47      if ( test_bit(DF_BLOCKED, &prev->flags) )
    15.1 --- a/xen/include/hypervisor-ifs/sched_ctl.h	Wed Jun 30 17:10:07 2004 +0000
    15.2 +++ b/xen/include/hypervisor-ifs/sched_ctl.h	Wed Jul 07 15:39:15 2004 +0000
    15.3 @@ -9,8 +9,9 @@
    15.4  
    15.5  /* Scheduler types */
    15.6  #define SCHED_BVT      0
    15.7 -#define SCHED_ATROPOS  1
    15.8 -#define SCHED_RROBIN   2
    15.9 +#define SCHED_FBVT     1
   15.10 +#define SCHED_ATROPOS  2
   15.11 +#define SCHED_RROBIN   3
   15.12  
   15.13  /* these describe the intended direction used for a scheduler control or domain
   15.14   * command */
   15.15 @@ -32,6 +33,12 @@ struct sched_ctl_cmd
   15.16              u32 ctx_allow;            /*  8: context switch allowance */
   15.17          } PACKED bvt;
   15.18  
   15.19 +        struct fbvt_ctl
   15.20 +        {
   15.21 +            /* IN variables. */
   15.22 +            u32 ctx_allow;            /*  8: context switch allowance */
   15.23 +        } PACKED fbvt;
   15.24 +
   15.25          struct rrobin_ctl
   15.26          {
   15.27              /* IN variables */
   15.28 @@ -55,6 +62,14 @@ struct sched_adjdom_cmd
   15.29              u32 warpu;      /* 28: unwarp time requirement */
   15.30          } PACKED bvt;
   15.31  
   15.32 +        struct fbvt_adjdom
   15.33 +        {
   15.34 +            u32 mcu_adv;    /* 16: mcu advance: inverse of weight */
   15.35 +            u32 warp;       /* 20: time warp */
   15.36 +            u32 warpl;      /* 24: warp limit */
   15.37 +            u32 warpu;      /* 28: unwarp time requirement */
   15.38 +        } PACKED fbvt;
   15.39 +
   15.40          struct atropos_adjdom
   15.41          {
   15.42              u64 nat_period; /* 16 */
    16.1 --- a/xen/include/xen/sched-if.h	Wed Jun 30 17:10:07 2004 +0000
    16.2 +++ b/xen/include/xen/sched-if.h	Wed Jul 07 15:39:15 2004 +0000
    16.3 @@ -50,6 +50,7 @@ struct scheduler
    16.4      void         (*dump_runq_el)   (struct domain *);
    16.5      int          (*prn_state)      (int);
    16.6      void         (*pause)          (struct domain *);
    16.7 +	void		 (*unpause)		   (struct domain *);
    16.8  };
    16.9  
   16.10  /* per CPU scheduler information */
    17.1 --- a/xen/include/xen/sched.h	Wed Jun 30 17:10:07 2004 +0000
    17.2 +++ b/xen/include/xen/sched.h	Wed Jul 07 15:39:15 2004 +0000
    17.3 @@ -218,6 +218,8 @@ int  sched_id();
    17.4  void init_idle_task(void);
    17.5  void domain_wake(struct domain *d);
    17.6  void domain_sleep(struct domain *d);
    17.7 +void pause_domain(struct domain *d);
    17.8 +void unpause_domain(struct domain *d);
    17.9  
   17.10  void __enter_scheduler(void);
   17.11  
   17.12 @@ -268,14 +270,14 @@ static inline void domain_pause(struct d
   17.13  {
   17.14      ASSERT(d != current);
   17.15      atomic_inc(&d->pausecnt);
   17.16 -    domain_sleep(d);
   17.17 +    pause_domain(d);
   17.18  }
   17.19  
   17.20  static inline void domain_unpause(struct domain *d)
   17.21  {
   17.22      ASSERT(d != current);
   17.23      if ( atomic_dec_and_test(&d->pausecnt) )
   17.24 -        domain_wake(d);
   17.25 +        unpause_domain(d);
   17.26  }
   17.27  
   17.28  static inline void domain_unblock(struct domain *d)
   17.29 @@ -288,13 +290,13 @@ static inline void domain_pause_by_syste
   17.30  {
   17.31      ASSERT(d != current);
   17.32      if ( !test_and_set_bit(DF_CTRLPAUSE, &d->flags) )
   17.33 -        domain_sleep(d);
   17.34 +        pause_domain(d);
   17.35  }
   17.36  
   17.37  static inline void domain_unpause_by_systemcontroller(struct domain *d)
   17.38  {
   17.39      if ( test_and_clear_bit(DF_CTRLPAUSE, &d->flags) )
   17.40 -        domain_wake(d);
   17.41 +        unpause_domain(d);
   17.42  }
   17.43  
   17.44