diff options
author | mwilli2@equilibrium.research.intel-research.net <mwilli2@equilibrium.research.intel-research.net> | 2004-03-17 18:31:06 +0000 |
---|---|---|
committer | mwilli2@equilibrium.research.intel-research.net <mwilli2@equilibrium.research.intel-research.net> | 2004-03-17 18:31:06 +0000 |
commit | bee5b0bb130f42dabd8cbdcd035d8f737e725dbc (patch) | |
tree | 99ac0cc05ceea17ead1d618190f88dfa33ea7f86 | |
parent | 8306baac6f817aea60eb6e7acfac96cbb007ed5a (diff) | |
download | xen-bee5b0bb130f42dabd8cbdcd035d8f737e725dbc.tar.gz xen-bee5b0bb130f42dabd8cbdcd035d8f737e725dbc.tar.bz2 xen-bee5b0bb130f42dabd8cbdcd035d8f737e725dbc.zip |
bitkeeper revision 1.808 (4058996anVCLQRr3o_Adf9GqJybYSg)
Various updates related to the new generic scheduler API.
The BVT scheduler has been ported to this API and a simple Round Robin
scheduler has been added. There's a new generic control interface for
setting scheduling parameters from userspace.
Use the sched=xxx option at boot time to choose the scheduler. Default
is BVT. The possibilities are "bvt" and "rrobin".
-rw-r--r-- | .rootkeys | 6 | ||||
-rw-r--r-- | docs/interface.tex | 303 | ||||
-rw-r--r-- | tools/xc/lib/xc.h | 7 | ||||
-rw-r--r-- | tools/xc/lib/xc_atropos.c | 38 | ||||
-rw-r--r-- | tools/xc/lib/xc_bvtsched.c | 19 | ||||
-rw-r--r-- | tools/xc/lib/xc_private.h | 1 | ||||
-rw-r--r-- | tools/xc/lib/xc_rrobin.c | 20 | ||||
-rw-r--r-- | tools/xc/py/Xc.c | 71 | ||||
-rw-r--r-- | xen/common/dom0_ops.c | 16 | ||||
-rw-r--r-- | xen/common/domain.c | 3 | ||||
-rw-r--r-- | xen/common/kernel.c | 3 | ||||
-rw-r--r-- | xen/common/keyhandler.c | 10 | ||||
-rw-r--r-- | xen/common/sched_bvt.c | 427 | ||||
-rw-r--r-- | xen/common/sched_rrobin.c | 56 | ||||
-rw-r--r-- | xen/common/schedule.c | 450 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/dom0_ops.h | 25 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/sched-ctl.h | 68 | ||||
-rw-r--r-- | xen/include/xeno/sched-if.h | 90 | ||||
-rw-r--r-- | xen/include/xeno/sched.h | 33 |
19 files changed, 1339 insertions, 307 deletions
@@ -71,6 +71,7 @@ 3fbba6dbDfYvJSsw9500b4SZyUhxjQ tools/xc/lib/Makefile 3fbba6dc1uU7U3IFeF6A-XEOYF2MkQ tools/xc/lib/rpm.spec 3fbba6dcrNxtygEcgJYAJJ1gCQqfsA tools/xc/lib/xc.h +40589968oCfoUlXd460CjVAkBE8IBA tools/xc/lib/xc_atropos.c 3fbba6dbEVkVMX0JuDFzap9jeaucGA tools/xc/lib/xc_bvtsched.c 3fbba6dbasJQV-MVElDC0DGSHMiL5w tools/xc/lib/xc_domain.c 40278d99BLsfUv3qxv0I8C1sClZ0ow tools/xc/lib/xc_elf.h @@ -83,6 +84,7 @@ 4051bce6CHAsYh8P5t2OHDtRWOP9og tools/xc/lib/xc_physdev.c 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/xc/lib/xc_private.c 3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/xc/lib/xc_private.h +40589968UQFnJeOMn8UIFLbXBuwXjw tools/xc/lib/xc_rrobin.c 3fbba6dcoGq9hQlksrBUfC2P5F6sGg tools/xc/lib/xc_vbd.c 3fbba6dc38q-ioRlwSR_quw4G3qUeQ tools/xc/lib/xc_vif.c 3fbd0a3dTwnDcfdw0-v46dPbX98zDw tools/xc/py/Makefile @@ -169,6 +171,8 @@ 4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c 4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c +40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c +40589968be_t_n0-w6ggceW7h-sx0w xen/common/sched_rrobin.c 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c 3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c 3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c @@ -497,6 +501,7 @@ 3ead095dE_VF-QA88rl_5cWYRWtRVQ xen/include/hypervisor-ifs/kbd.h 3ddb79c2oRPrzClk3zbTkRHlpumzKA xen/include/hypervisor-ifs/network.h 4051db79512nOCGweabrFWO2M2h5ng xen/include/hypervisor-ifs/physdev.h +40589968wmhPmV5-ENbBYmMjnedgKw xen/include/hypervisor-ifs/sched-ctl.h 404f3d2eR2Owk-ZcGOx9ULGHg3nrww xen/include/hypervisor-ifs/trace.h 3f0d22cbroqp_BkoDPwkfRJhaw1LiQ xen/include/hypervisor-ifs/vbd.h 3ddb79c4qbCoOFHrv9sCGshbWzBVlQ xen/include/scsi/scsi.h @@ -562,6 +567,7 @@ 3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xeno/prefetch.h 4006e65fWMwLqcocgik6wbF0Eeh0Og xen/include/xeno/rbtree.h 3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xeno/reboot.h +40589969nPq3DMzv24RDb5LXE9brHw xen/include/xeno/sched-if.h 3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xeno/sched.h 403a06a7H0hpHcKpAiDe5BPnaXWTlA xen/include/xeno/serial.h 3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen/include/xeno/skbuff.h diff --git a/docs/interface.tex b/docs/interface.tex index 2736a0412d..84003de1b6 100644 --- a/docs/interface.tex +++ b/docs/interface.tex @@ -353,7 +353,7 @@ create ``virtual disks'' on demand. \subsection{Virtual Disk Management} The VD management code consists of a set of python libraries. It can therefore be accessed by custom scripts as well as the convenience scripts provided. The -VD database is a SQLite database in /var/db/xen\_vdisk.sqlite. +VD database is a SQLite database in /var/db/xen\_vdisks.sqlite. The VD scripts and general VD usage are documented in the VBD-HOWTO.txt. @@ -379,6 +379,307 @@ giving the page back to the hypervisor, or to use them for storing page tables. and providing control interfaces for managing scheduling, networking, and blocks. +\chapter{CPU Scheduler} + +Xen offers a uniform API for CPU schedulers. It is possible to choose +from a number of schedulers at boot and it should be easy to add more. + +\paragraph*{Note: SMP host support} +Xen has always supported SMP host systems. Domains are statically assigned to +CPUs, either at creation time or when manually pinning to a particular CPU. +The current schedulers then run locally on each CPU to decide which of the +assigned domains should be run there. + +\section{Standard Schedulers} + +These BVT and Round Robin schedulers are part of the normal Xen +distribution. A port of the Atropos scheduler from the Nemesis +operating system is almost complete and will be added shortly. + +\subsection{Borrowed Virtual Time (BVT)} + +This was the original Xen scheduler. BVT is designed for general-purpose +environments but also provides support for latency-sensitive threads. It +provides long-term weighted sharing but allows tasks a limited ability to +``warp back'' in virtual time so that they are dispatched earlier. + +BVT can be activated by specifying {\tt sched=bvt} as a boot argument to Xen. + +\subsection{Round Robin} + +The round robin scheduler is a very simple example of some of the basic parts +of the scheduler API. + +Round robin can be activated by specifying {\tt sched=rrobin} as a boot +argument to Xen. + +\section{Scheduling API} + +The scheduling API is used by both the schedulers described above and should +also be used by any new schedulers. It provides a generic interface and also +implements much of the ``boilerplate'' code. + +\paragraph*{Note:} the scheduler API is currently undergoing active development, +so there may be some changes to this API, although they are expected to be small. + +Schedulers conforming to this API are described by the following +structure: + +\begin{verbatim} +struct scheduler +{ + char *name; /* full name for this scheduler */ + char *opt_name; /* option name for this scheduler */ + unsigned int sched_id; /* ID for this scheduler */ + + int (*init_scheduler) (); + int (*alloc_task) (struct task_struct *); + void (*add_task) (struct task_struct *); + void (*free_task) (struct task_struct *); + void (*rem_task) (struct task_struct *); + void (*wake_up) (struct task_struct *); + long (*do_block) (struct task_struct *); + task_slice_t (*do_schedule) (s_time_t); + int (*control) (struct sched_ctl_cmd *); + int (*adjdom) (struct task_struct *, + struct sched_adjdom_cmd *); + s32 (*reschedule) (struct task_struct *); + void (*dump_settings) (void); + void (*dump_cpu_state) (int); + void (*dump_runq_el) (struct task_struct *); +}; +\end{verbatim} + +The only method that {\em must} be implemented is +{\tt do\_schedule()}. However, if there is not some implementation for the +{\tt wake\_up()} method then waking tasks will not get put on the runqueue! + +The fields of the above structure are described in more detail below. + +\subsubsection{name} + +The name field is an arbitrary descriptive ASCII string. + +\subsubsection{opt\_name} + +This field is the value of the {\tt sched=} boot-time option that will select +this scheduler. + +\subsubsection{sched\_id} + +This is an integer that uniquely identifies this scheduler. There should be a +macro corrsponding to this scheduler ID in {\tt <hypervisor-ifs/sched-if.h>}. + +\subsubsection{init\_scheduler} + +\paragraph*{Purpose} + +This is a function for performing any scheduler-specific initialisation. For +instance, it might allocate memory for per-CPU scheduler data and initialise it +appropriately. + +\paragraph*{Call environment} + +This function is called after the initialisation performed by the generic +layer. The function is called exactly once, for the scheduler that has been +selected. + +\paragraph*{Return values} + +This should return negative on failure --- failure to initialise the scheduler +will cause an immediate panic. + +\subsubsection{alloc\_task} + +\paragraph*{Purpose} +This is called when a {\tt task\_struct} is allocated by the generic scheduler +layer. A particular scheduler implementation may use this method to allocate +per-task data for this task. It may use the {\tt sched\_priv} pointer in the +{\tt task\_struct} to point to this data. + +\paragraph*{Call environment} +The generic layer guarantees that the {\tt sched\_priv} field will +remain intact from the time this method is called until the task is +deallocated (so long as the scheduler implementation does not change +it!). + +\paragraph*{Return values} +Negative on failure. + +\subsubsection{add\_task} + +\paragraph*{Purpose} + +Called when a task is initially added by the generic layer. + +\paragraph*{Call environment} + +The fields in the {\tt task\_struct} are now filled out and available for use. +Schedulers should implement appropriate initialisation of any per-task private +information in this method. + +\subsubsection{free\_task} + +\paragraph*{Purpose} + +Schedulers should free the space used by any associated private data +structures. + +\paragraph*{Call environment} + +This is called when a {\tt task\_struct} is about to be deallocated. +The generic layer will have done generic task removal operations and +(if implemented) called the scheduler's {\tt rem\_task} method before +this method is called. + +\subsubsection{rem\_task} + +\paragraph*{Purpose} + +This is called when a task is being removed from scheduling. + +\subsubsection{wake\_up} + +\paragraph*{Purpose} + +Called when a task is woken up, this method should put the task on the runqueue +(or do the scheduler-specific equivalent action). + +\paragraph*{Call environment} + +The generic layer guarantees that the task is already in state +RUNNING. + +\subsubsection{do\_block} + +\paragraph*{Purpose} + +This function is called when a task is blocked. This function should +not remove the task from the runqueue. + +\paragraph*{Call environment} + +The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to +TASK\_INTERRUPTIBLE on entry to this method. + +\subsubsection{do\_schedule} + +This method must be implemented. + +\paragraph*{Purpose} + +The method is called each time a new task must be chosen for scheduling on the +current CPU. The current time as passed as the single argument (the current +task can be found using the {\tt current} variable). + +This method should select the next task to run on this CPU and set it's minimum +time to run as well as returning the data described below. + +This method should also take the appropriate action if the previous +task has blocked, e.g. removing it from the runqueue. + +\paragraph*{Call environment} + +The other fields in the {\tt task\_struct} are updated by the generic layer, +which also performs all Xen-specific tasks and performs the actual task switch +(unless the previous task has been chosen again). + +This method is called with the {\tt schedule\_lock} held for the current CPU +and with interrupts disabled. + +\paragraph*{Return values} + +Must return a {\tt struct task\_slice} describing what task to run and how long +for (at maximum). + +\subsubsection{control} + +\paragraph*{Purpose} + +This method is called for global scheduler control operations. It takes a +pointer to a {\tt struct sched\_ctl\_cmd}, from which it should select the +appropriate command data. + +\paragraph*{Call environment} + +The generic layer guarantees that when this method is called, the caller was +using the same control interface version and that the caller selected the +correct scheduler ID, hence the scheduler's implementation does not need to +sanity-check these parts of the call. + +\paragraph*{Return values} + +This function should return the value to be passed back to user space, hence it +should either be 0 or an appropriate errno value. + +\subsubsection{sched\_adjdom} + +\paragraph*{Purpose} + +This method is called to adjust the scheduling parameters of a particular +domain. + +\paragraph*{Call environment} + +The generic layer guarantees that the caller has specified the correct +control interface version and scheduler ID and that the supplied {\tt +task\_struct} will not be deallocated during the call (hence it is not +necessary to {\tt get\_task\_struct}). + +\paragraph*{Return values} + +This function should return the value to be passed back to user space, hence it +should either be 0 or an appropriate errno value. + +\subsubsection{reschedule} + +\paragraph*{Purpose} + +This method is called to determine if a reschedule is required as a result of a +particular task. + +\paragraph*{Call environment} +The generic layer will cause a reschedule if the current domain is the idle +task or it has exceeded its minimum time slice before a reschedule. The +generic layer guarantees that the task passed is not currently running but is +on the runqueue. + +\paragraph*{Return values} + +Should return a mask of CPUs to cause a reschedule on. + +\subsubsection{dump\_settings} + +\paragraph*{Purpose} + +If implemented, this should dump any private global settings for this +scheduler to the console. + +\paragraph*{Call environment} + +This function is called with interrupts enabled. + +\subsubsection{dump\_cpu\_state} + +\paragraph*{Purpose} + +This method should dump any private settings for the specified CPU. + +\paragraph*{Call environment} + +This function is called with interrupts disabled and the {\tt schedule\_lock} +for the specified CPU held. + +\subsubsection{dump\_runq\_el} + +\paragraph*{Purpose} + +This method should dump any private settings for the specified task. + +\paragraph*{Call environment} + +This function is called with interrupts disabled and the {\tt schedule\_lock} +for the task's CPU held. \chapter{Debugging} diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h index fd1494dc13..aba2906842 100644 --- a/tools/xc/lib/xc.h +++ b/tools/xc/lib/xc.h @@ -81,6 +81,13 @@ int xc_bvtsched_domain_set(int xc_handle, unsigned long warpl, unsigned long warpu); +int xc_atropos_domain_set(int xc_handle, + u64 domid, + int xtratime); + +int xc_rrobin_global_set(int xc_handle, + u64 slice); + typedef struct { unsigned long credit_bytes; unsigned long credit_usec; diff --git a/tools/xc/lib/xc_atropos.c b/tools/xc/lib/xc_atropos.c new file mode 100644 index 0000000000..b9ee719b0f --- /dev/null +++ b/tools/xc/lib/xc_atropos.c @@ -0,0 +1,38 @@ +/****************************************************************************** + * xc_atropos.c + * + * API for manipulating parameters of the Atropos scheduler. + * + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. + */ + +#include "xc_private.h" + +int xc_atropos_global_set(int xc_handle, + unsigned long ctx_allow) +{ + dom0_op_t op; + op.cmd = DOM0_SCHEDCTL; + op.u.schedctl.if_ver = SCHED_CTL_IF_VER; + op.u.schedctl.sched_id = SCHED_BVT; + + op.u.schedctl.u.bvt.ctx_allow = ctx_allow; + return do_dom0_op(xc_handle, &op); +} + +int xc_atropos_domain_set(int xc_handle, + u64 domid, int xtratime) +{ + dom0_op_t op; + op.cmd = DOM0_ADJUSTDOM; + + op.u.adjustdom.domain = (domid_t)domid; + op.u.adjustdom.if_ver = SCHED_CTL_IF_VER; + op.u.adjustdom.sched_id = SCHED_ATROPOS; + + op.u.adjustdom.u.atropos.xtratime = xtratime; + + printf("Doing dom0 op!\n"); + + return do_dom0_op(xc_handle, &op); +} diff --git a/tools/xc/lib/xc_bvtsched.c b/tools/xc/lib/xc_bvtsched.c index 57554cd2e6..e38f9cf09e 100644 --- a/tools/xc/lib/xc_bvtsched.c +++ b/tools/xc/lib/xc_bvtsched.c @@ -12,8 +12,10 @@ int xc_bvtsched_global_set(int xc_handle, unsigned long ctx_allow) { dom0_op_t op; - op.cmd = DOM0_BVTCTL; - op.u.bvtctl.ctx_allow = ctx_allow; + op.cmd = DOM0_SCHEDCTL; + op.u.schedctl.sched_id = SCHED_BVT; + op.u.schedctl.if_ver = SCHED_CTL_IF_VER; + op.u.schedctl.u.bvt.ctx_allow = ctx_allow; return do_dom0_op(xc_handle, &op); } @@ -25,11 +27,16 @@ int xc_bvtsched_domain_set(int xc_handle, unsigned long warpu) { dom0_op_t op; + struct bvt_adjdom *adjptr = &op.u.adjustdom.u.bvt; + op.cmd = DOM0_ADJUSTDOM; + op.u.adjustdom.sched_id = SCHED_BVT; + op.u.adjustdom.if_ver = SCHED_CTL_IF_VER; op.u.adjustdom.domain = (domid_t)domid; - op.u.adjustdom.mcu_adv = mcuadv; - op.u.adjustdom.warp = warp; - op.u.adjustdom.warpl = warpl; - op.u.adjustdom.warpu = warpu; + + adjptr->mcu_adv = mcuadv; + adjptr->warp = warp; + adjptr->warpl = warpl; + adjptr->warpu = warpu; return do_dom0_op(xc_handle, &op); } diff --git a/tools/xc/lib/xc_private.h b/tools/xc/lib/xc_private.h index d5ce8947ea..859d3a7740 100644 --- a/tools/xc/lib/xc_private.h +++ b/tools/xc/lib/xc_private.h @@ -23,6 +23,7 @@ #include <dom0_ops.h> #include <vbd.h> #include <event_channel.h> +#include <sched-ctl.h> #define _PAGE_PRESENT 0x001 #define _PAGE_RW 0x002 diff --git a/tools/xc/lib/xc_rrobin.c b/tools/xc/lib/xc_rrobin.c new file mode 100644 index 0000000000..4d986cee83 --- /dev/null +++ b/tools/xc/lib/xc_rrobin.c @@ -0,0 +1,20 @@ +/****************************************************************************** + * xc_rrobin.c + * + * API for manipulating parameters of the Round Robin scheduler + * + * by Mark Williamson, Copyright (c) 2004 Intel Research Cambridge. + */ + +#include "xc_private.h" + +int xc_rrobin_global_set(int xc_handle, u64 slice) +{ + dom0_op_t op; + op.cmd = DOM0_SCHEDCTL; + op.u.schedctl.if_ver = SCHED_CTL_IF_VER; + op.u.schedctl.sched_id = SCHED_RROBIN; + + op.u.schedctl.u.rrobin.slice = slice; + return do_dom0_op(xc_handle, &op); +} diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c index 8ed16ee505..cef2a046ef 100644 --- a/tools/xc/py/Xc.c +++ b/tools/xc/py/Xc.c @@ -290,10 +290,10 @@ static PyObject *pyxc_bvtsched_domain_set(PyObject *self, u64 dom; unsigned long mcuadv, warp, warpl, warpu; - static char *kwd_list[] = { "dom", "mcuadv", "warp", "warpl", + static char *kwd_list[] = { "dom", "mcuadv", "warp", "warpl", "warpu", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lllll", kwd_list, + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lllll", kwd_list, &dom, &mcuadv, &warp, &warpl, &warpu) ) return NULL; @@ -862,6 +862,49 @@ static PyObject *pyxc_physinfo(PyObject *self, "cpu_khz", info.cpu_khz); } +static PyObject *pyxc_atropos_domain_set(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + PyObject *ret_obj; + int xtratime; + u64 domid; + + static char *kwd_list[] = { "dom", "xtratime", NULL }; + + if( !PyArg_ParseTupleAndKeywords(args, kwds, "Li", kwd_list, &domid, + &xtratime) ) + return NULL; + + if ( xc_atropos_domain_set(xc->xc_handle, domid, xtratime) != 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_rrobin_global_set(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + PyObject *ret_obj; + u64 slice; + + static char *kwd_list[] = { "slice", NULL }; + + if( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &slice) ) + return NULL; + + if ( xc_rrobin_global_set(xc->xc_handle, slice) != 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + + static PyMethodDef pyxc_methods[] = { { "domain_create", (PyCFunction)pyxc_domain_create, @@ -955,15 +998,15 @@ static PyMethodDef pyxc_methods[] = { " cmdline [str, n/a]: Kernel parameters, if any.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, - { "bvtsched_global_set", - (PyCFunction)pyxc_bvtsched_global_set, + { "bvtsched_global_set", + (PyCFunction)pyxc_bvtsched_global_set, METH_VARARGS | METH_KEYWORDS, "\n" "Set global tuning parameters for Borrowed Virtual Time scheduler.\n" " ctx_allow [int]: Minimal guaranteed quantum (I think!).\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, - { "bvtsched_domain_set", - (PyCFunction)pyxc_bvtsched_domain_set, + { "bvtsched_domain_set", + (PyCFunction)pyxc_bvtsched_domain_set, METH_VARARGS | METH_KEYWORDS, "\n" "Set per-domain tuning parameters for Borrowed Virtual Time scheduler.\n" " dom [long]: Identifier of domain to be tuned.\n" @@ -973,6 +1016,22 @@ static PyMethodDef pyxc_methods[] = { " warpu [int]: Internal BVT parameter.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "atropos_domain_set", + (PyCFunction)pyxc_atropos_domain_set, + METH_VARARGS | METH_KEYWORDS, "\n" + "Set the extra time flag for a domain when running with Atropos.\n" + " dom [long]: domain to set\n" + " xtratime [int]: boolean\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "rrobin_global_set", + (PyCFunction)pyxc_rrobin_global_set, + METH_KEYWORDS, "\n" + "Set Round Robin scheduler slice.\n" + " slice [long]: Round Robin scheduler slice\n" + "Returns: [int] 0 on success, throws an exception on failure\n" + }, + { "vif_scheduler_set", (PyCFunction)pyxc_vif_scheduler_set, METH_VARARGS | METH_KEYWORDS, "\n" diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index b39ead491c..1d69f35bf3 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -18,6 +18,7 @@ #include <asm/pdb.h> #include <xeno/trace.h> #include <xeno/console.h> +#include <hypervisor-ifs/sched-ctl.h> extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int); @@ -196,22 +197,15 @@ long do_dom0_op(dom0_op_t *u_dom0_op) } break; - case DOM0_BVTCTL: + case DOM0_SCHEDCTL: { - unsigned long ctx_allow = op->u.bvtctl.ctx_allow; - ret = sched_bvtctl(ctx_allow); + ret = sched_ctl(&op->u.schedctl); } break; case DOM0_ADJUSTDOM: { - domid_t dom = op->u.adjustdom.domain; - unsigned long mcu_adv = op->u.adjustdom.mcu_adv; - unsigned long warp = op->u.adjustdom.warp; - unsigned long warpl = op->u.adjustdom.warpl; - unsigned long warpu = op->u.adjustdom.warpu; - - ret = sched_adjdom(dom, mcu_adv, warp, warpl, warpu); + ret = sched_adjdom(&op->u.adjustdom); } break; @@ -281,7 +275,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( (p->state == TASK_STOPPED) || (p->state == TASK_DYING) ) op->u.getdomaininfo.state = DOMSTATE_STOPPED; op->u.getdomaininfo.hyp_events = p->hyp_events; - op->u.getdomaininfo.mcu_advance = p->mcu_advance; +// op->u.getdomaininfo.mcu_advance = p->mcu_advance; op->u.getdomaininfo.tot_pages = p->tot_pages; op->u.getdomaininfo.cpu_time = p->cpu_time; op->u.getdomaininfo.shared_info_frame = diff --git a/xen/common/domain.c b/xen/common/domain.c index 8921ee246d..e61f02a26a 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -43,7 +43,6 @@ struct task_struct *do_createdomain(domid_t dom_id, unsigned int cpu) if ( (p = alloc_task_struct()) == NULL ) return NULL; - memset(p, 0, sizeof(*p)); atomic_set(&p->refcnt, 1); @@ -496,7 +495,7 @@ void release_task(struct task_struct *p) UNSHARE_PFN(virt_to_page(p->shared_info)); free_all_dom_mem(p); - kmem_cache_free(task_struct_cachep, p); + free_task_struct(p); } diff --git a/xen/common/kernel.c b/xen/common/kernel.c index f99f3fac32..5f2f27728f 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -71,6 +71,8 @@ int opt_watchdog=0; unsigned char opt_pdb[10] = "none"; /* opt_tbuf_size: trace buffer size (in pages) */ unsigned int opt_tbuf_size = 1; +/* opt_sched: scheduler - default to Borrowed Virtual Time */ +char opt_sched[10] = "bvt"; static struct { unsigned char *name; @@ -91,6 +93,7 @@ static struct { { "watchdog", OPT_BOOL, &opt_watchdog }, { "pdb", OPT_STR, &opt_pdb }, { "tbuf_size", OPT_UINT, &opt_tbuf_size }, + { "sched", OPT_STR, &opt_sched }, { NULL, 0, NULL } }; diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c index 32786fdfd1..75f1e38c56 100644 --- a/xen/common/keyhandler.c +++ b/xen/common/keyhandler.c @@ -86,7 +86,15 @@ static char *task_states[] = NULL, NULL, NULL, - "Dying ", + "Dying ", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Sched priv" }; void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c new file mode 100644 index 0000000000..f473e3f760 --- /dev/null +++ b/xen/common/sched_bvt.c @@ -0,0 +1,427 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- + **************************************************************************** + * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge + * (C) 2002-2003 University of Cambridge + * (C) 2004 - Mark Williamson - Intel Research Cambridge + **************************************************************************** + * + * File: common/schedule.c + * Author: Rolf Neugebauer & Keir Fraser + * Updated for generic API by Mark Williamson + * + * Description: CPU scheduling + * implements A Borrowed Virtual Time scheduler. + * (see Duda & Cheriton SOSP'99) + */ + +#include <xeno/config.h> +#include <xeno/init.h> +#include <xeno/lib.h> +#include <xeno/sched.h> +#include <xeno/delay.h> +#include <xeno/event.h> +#include <xeno/time.h> +#include <xeno/ac_timer.h> +#include <xeno/interrupt.h> +#include <xeno/timer.h> +#include <xeno/perfc.h> +#include <xeno/sched-if.h> +#include <xeno/slab.h> + +/* all per-domain BVT-specific scheduling info is stored here */ +struct bvt_dom_info +{ + unsigned long mcu_advance; /* inverse of weight */ + u32 avt; /* actual virtual time */ + u32 evt; /* effective virtual time */ + int warpback; /* warp? */ + long warp; /* virtual time warp */ + long warpl; /* warp limit */ + long warpu; /* unwarp time requirement */ + s_time_t warped; /* time it ran warped last time */ + s_time_t uwarped; /* time it ran unwarped last time */ +}; + +struct bvt_cpu_info +{ + unsigned long svt; /* XXX check this is unsigned long! */ +}; + + +#define DOM_INF(p) ((struct bvt_dom_info *)(p)->sched_priv) +#define CPU_INF(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv) +#define CPU_SVT(cpu) (CPU_INF(cpu)->svt) + +#define MCU (s32)MICROSECS(100) /* Minimum unit */ +#define MCU_ADVANCE 10 /* default weight */ +#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ +static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */ + +/* SLAB cache for struct bvt_dom_info objects */ +static kmem_cache_t *dom_info_cache; + +/* + * Calculate the effective virtual time for a domain. Take into account + * warping limits + */ +static void __calc_evt(struct bvt_dom_info *inf) +{ + s_time_t now = NOW(); + + if ( inf->warpback ) + { + if ( ((now - inf->warped) < inf->warpl) && + ((now - inf->uwarped) > inf->warpu) ) + { + /* allowed to warp */ + inf->evt = inf->avt - inf->warp; + } + else + { + /* warped for too long -> unwarp */ + inf->evt = inf->avt; + inf->uwarped = now; + inf->warpback = 0; + } + } + else + { + inf->evt = inf->avt; + } +} + +/** + * bvt_alloc_task - allocate BVT private structures for a task + * @p: task to allocate private structures for + * + * Returns non-zero on failure. + */ +int bvt_alloc_task(struct task_struct *p) +{ + DOM_INF(p) + = (struct bvt_dom_info *)kmem_cache_alloc(dom_info_cache,GFP_KERNEL); + + if ( DOM_INF(p) == NULL ) + return -1; + + return 0; +} + +/* + * Add and remove a domain + */ +void bvt_add_task(struct task_struct *p) +{ + struct bvt_dom_info *inf = DOM_INF(p); + + ASSERT(inf != NULL); + ASSERT(p != NULL); + + inf->mcu_advance = MCU_ADVANCE; + + if ( p->domain == IDLE_DOMAIN_ID ) + { + inf->avt = inf->evt = ~0U; + } + else + { + /* Set avt and evt to system virtual time. */ + inf->avt = CPU_SVT(p->processor); + inf->evt = CPU_SVT(p->processor); + /* Set some default values here. */ + inf->warpback = 0; + inf->warp = 0; + inf->warpl = 0; + inf->warpu = 0; + } + + return; +} + +/** + * bvt_free_task - free BVT private structures for a task + * @p: task + */ +void bvt_free_task(struct task_struct *p) +{ + ASSERT( p->sched_priv != NULL ); + kmem_cache_free( dom_info_cache, p->sched_priv ); +} + + +void bvt_wake_up(struct task_struct *p) +{ + struct bvt_dom_info *inf = DOM_INF(p); + + ASSERT(inf != NULL); + + + /* set the BVT parameters */ + if (inf->avt < CPU_SVT(p->processor)) + inf->avt = CPU_SVT(p->processor); + + /* deal with warping here */ + inf->warpback = 1; + inf->warped = NOW(); + __calc_evt(inf); + __add_to_runqueue_head(p); +} + +/* + * Block the currently-executing domain until a pertinent event occurs. + */ +static long bvt_do_block(struct task_struct *p) +{ + DOM_INF(p)->warpback = 0; + return 0; +} + +/* Control the scheduler. */ +int bvt_ctl(struct sched_ctl_cmd *cmd) +{ + struct bvt_ctl *params = &cmd->u.bvt; + + ctx_allow = params->ctx_allow; + + return 0; +} + +/* Adjust scheduling parameter for a given domain. */ +int bvt_adjdom(struct task_struct *p, + struct sched_adjdom_cmd *cmd) +{ + struct bvt_adjdom *params = &cmd->u.bvt; + unsigned long mcu_adv = params->mcu_adv, + warp = params->warp, + warpl = params->warpl, + warpu = params->warpu; + + struct bvt_dom_info *inf = DOM_INF(p); + + /* Sanity -- this can avoid divide-by-zero. */ + if ( mcu_adv == 0 ) + return -EINVAL; + + spin_lock_irq(&schedule_lock[p->processor]); + inf->mcu_advance = mcu_adv; + inf->warp = warp; + inf->warpl = warpl; + inf->warpu = warpu; + spin_unlock_irq(&schedule_lock[p->processor]); + + return 0; +} + + +/* + * The main function + * - deschedule the current domain. + * - pick a new domain. + * i.e., the domain with lowest EVT. + * The runqueue should be ordered by EVT so that is easy. + */ +static task_slice_t bvt_do_schedule(s_time_t now) +{ + struct task_struct *prev = current, *next = NULL, *next_prime, *p; + struct list_head *tmp; + int cpu = prev->processor; + s32 r_time; /* time for new dom to run */ + s32 ranfor; /* assume we never run longer than 2.1s! */ + s32 mcus; + u32 next_evt, next_prime_evt, min_avt; + struct bvt_dom_info *prev_inf = DOM_INF(prev), + *p_inf = NULL, + *next_inf = NULL, + *next_prime_inf = NULL; + task_slice_t ret; + + ASSERT(prev->sched_priv != NULL); + ASSERT(prev_inf != NULL); + + if ( likely(!is_idle_task(prev)) ) + { + ranfor = (s32)(now - prev->lastschd); + /* Calculate mcu and update avt. */ + mcus = (ranfor + MCU - 1) / MCU; + prev_inf->avt += mcus * prev_inf->mcu_advance; + + __calc_evt(prev_inf); + + __del_from_runqueue(prev); + + if ( likely(prev->state == TASK_RUNNING) ) + __add_to_runqueue_tail(prev); + } + + /* We should at least have the idle task */ + ASSERT(!list_empty(&schedule_data[cpu].runqueue)); + + /* + * scan through the run queue and pick the task with the lowest evt + * *and* the task the second lowest evt. + * this code is O(n) but we expect n to be small. + */ + next = schedule_data[cpu].idle; + next_prime = NULL; + + next_evt = ~0U; + next_prime_evt = ~0U; + min_avt = ~0U; + + list_for_each ( tmp, &schedule_data[cpu].runqueue ) + { + p = list_entry(tmp, struct task_struct, run_list); + p_inf = DOM_INF(p); + + if ( p_inf->evt < next_evt ) + { + next_prime = next; + next_prime_evt = next_evt; + next = p; + next_evt = p_inf->evt; + } + else if ( next_prime_evt == ~0U ) + { + next_prime_evt = p_inf->evt; + next_prime = p; + } + else if ( p_inf->evt < next_prime_evt ) + { + next_prime_evt = p_inf->evt; + next_prime = p; + } + + /* Determine system virtual time. */ + if ( p_inf->avt < min_avt ) + min_avt = p_inf->avt; + } + + /* Update system virtual time. */ + if ( min_avt != ~0U ) + CPU_SVT(cpu) = min_avt; + + /* check for virtual time overrun on this cpu */ + if ( CPU_SVT(cpu) >= 0xf0000000 ) + { + u_long t_flags; + write_lock_irqsave(&tasklist_lock, t_flags); + for_each_domain ( p ) + { + if ( p->processor == cpu ) + { + p_inf->evt -= 0xe0000000; + p_inf->avt -= 0xe0000000; + } + } + write_unlock_irqrestore(&tasklist_lock, t_flags); + CPU_SVT(cpu) -= 0xe0000000; + } + + /* work out time for next run through scheduler */ + if ( is_idle_task(next) ) + { + r_time = ctx_allow; + goto sched_done; + } + + if ( (next_prime == NULL) || is_idle_task(next_prime) ) + { + /* We have only one runnable task besides the idle task. */ + r_time = 10 * ctx_allow; /* RN: random constant */ + goto sched_done; + } + + next_prime_inf = DOM_INF(next_prime); + next_inf = DOM_INF(next); + + /* + * If we are here then we have two runnable tasks. + * Work out how long 'next' can run till its evt is greater than + * 'next_prime's evt. Take context switch allowance into account. + */ + ASSERT(next_prime_inf->evt >= next_inf->evt); + + r_time = ((next_prime_inf->evt - next_inf->evt)/next_inf->mcu_advance) + + ctx_allow; + + ASSERT(r_time >= ctx_allow); + + sched_done: + next->min_slice = ctx_allow; + ret.task = next; + ret.time = r_time; + + return ret; +} + + +static void bvt_dump_runq_el(struct task_struct *p) +{ + struct bvt_dom_info *inf = DOM_INF(p); + + printk("mcua=0x%04lX ev=0x%08X av=0x%08X ", + inf->mcu_advance, inf->evt, inf->avt); +} + +static void bvt_dump_settings(void) +{ + printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns ", (u32)MCU, (s32)ctx_allow ); +} + +static void bvt_dump_cpu_state(int i) +{ + printk("svt=0x%08lX ", CPU_SVT(i)); +} + + +/* Initialise the data structures. */ +int bvt_init_scheduler() +{ + int i; + + for ( i = 0; i < NR_CPUS; i++ ) + { + CPU_INF(i) = kmalloc(sizeof(struct bvt_cpu_info), GFP_KERNEL); + + if ( CPU_INF(i) == NULL ) + { + printk("Failed to allocate BVT scheduler private per-CPU memory!\n"); + return -1; + } + + CPU_SVT(i) = 0; /* XXX do I really need to do this? */ + } + + dom_info_cache = kmem_cache_create("BVT dom info", + sizeof(struct bvt_dom_info), + 0, 0, NULL, NULL); + + if ( dom_info_cache == NULL ) + { + printk("BVT: Failed to allocate domain info SLAB cache"); + return -1; + } + + return 0; +} + + +struct scheduler sched_bvt_def = { + .name = "Borrowed Virtual Time", + .opt_name = "bvt", + .sched_id = SCHED_BVT, + + .init_scheduler = bvt_init_scheduler, + .alloc_task = bvt_alloc_task, + .add_task = bvt_add_task, + .free_task = bvt_free_task, + .wake_up = bvt_wake_up, + .do_block = bvt_do_block, + .do_schedule = bvt_do_schedule, + .control = bvt_ctl, + .adjdom = bvt_adjdom, + .dump_settings = bvt_dump_settings, + .dump_cpu_state = bvt_dump_cpu_state, + .dump_runq_el = bvt_dump_runq_el, +}; + diff --git a/xen/common/sched_rrobin.c b/xen/common/sched_rrobin.c new file mode 100644 index 0000000000..ef4db60066 --- /dev/null +++ b/xen/common/sched_rrobin.c @@ -0,0 +1,56 @@ +/**************************************************************************** + * Very stupid Round Robin Scheduler for Xen + * + * by Mark Williamson (C) 2004 Intel Research Cambridge + */ + +#include <xeno/sched.h> +#include <xeno/sched-if.h> +#include <hypervisor-ifs/sched-ctl.h> +#include <xeno/ac_timer.h> +#include <xeno/time.h> + +static s_time_t rr_slice = MILLISECS(10); + +static task_slice_t rr_do_schedule(s_time_t now) +{ + struct task_struct *prev = current; + int cpu = current->processor; + task_slice_t ret; + + __del_from_runqueue(prev); + + if ( prev->state == TASK_RUNNING ) + __add_to_runqueue_tail(prev); + + ret.task = list_entry(schedule_data[cpu].runqueue.next, + struct task_struct, run_list); + + ret.time = rr_slice; + + return ret; +} + +static int rr_ctl(struct sched_ctl_cmd *cmd) +{ + rr_slice = cmd->u.rrobin.slice; + return 0; +} + +static void rr_dump_settings() +{ + printk("rr_slice = %llu ", rr_slice); +} + +struct scheduler sched_rrobin_def = { + .name = "Stupid Round Robin Scheduler", + .opt_name = "rrobin", + .sched_id = SCHED_RROBIN, + + .wake_up = __add_to_runqueue_head, + .do_schedule = rr_do_schedule, + .control = rr_ctl, + .dump_settings = rr_dump_settings, +}; + + diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 5487f15e5a..054123077c 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -2,14 +2,16 @@ **************************************************************************** * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge * (C) 2002-2003 University of Cambridge + * (C) 2004 - Mark Williamson - Intel Research Cambridge **************************************************************************** * * File: common/schedule.c * Author: Rolf Neugebauer & Keir Fraser + * Updated for generic API by Mark Williamson * - * Description: CPU scheduling - * implements A Borrowed Virtual Time scheduler. - * (see Duda & Cheriton SOSP'99) + * Description: Generic CPU scheduling code + * implements support functionality for the Xen scheduler API. + * */ #include <xeno/config.h> @@ -23,6 +25,9 @@ #include <xeno/interrupt.h> #include <xeno/timer.h> #include <xeno/perfc.h> +#include <xeno/sched-if.h> +#include <hypervisor-ifs/sched-ctl.h> +#include <xeno/trace.h> /*#define WAKEUP_HISTO*/ /*#define BLOCKTIME_HISTO*/ @@ -33,23 +38,54 @@ #define BUCKETS 200 #endif -#define MCU (s32)MICROSECS(100) /* Minimum unit */ -#define MCU_ADVANCE 10 /* default weight */ #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */ -static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */ -typedef struct schedule_data_st -{ - struct list_head runqueue; /* runqueue */ - struct task_struct *curr; /* current task */ - struct task_struct *idle; /* idle task for this cpu */ - u32 svt; /* system virtual time. per CPU??? */ - struct ac_timer s_timer; /* scheduling timer */ -#ifdef BUCKETS - u32 hist[BUCKETS]; /* for scheduler latency histogram */ -#endif -} __cacheline_aligned schedule_data_t; -static schedule_data_t schedule_data[NR_CPUS]; +/* XXX MAW pull trace-related #defines out of here and into an auto-generated + * header file later on! */ +#define TRC_SCHED_DOM_ADD 0x00010000 +#define TRC_SCHED_DOM_REM 0x00010001 +#define TRC_SCHED_WAKE 0x00010002 +#define TRC_SCHED_BLOCK 0x00010003 +#define TRC_SCHED_YIELD 0x00010004 +#define TRC_SCHED_SET_TIMER 0x00010005 +#define TRC_SCHED_CTL 0x00010006 +#define TRC_SCHED_ADJDOM 0x00010007 +#define TRC_SCHED_RESCHED 0x00010008 +#define TRC_SCHED_SWITCH 0x00010009 +#define TRC_SCHED_S_TIMER_FN 0x0001000A +#define TRC_SCHED_T_TIMER_FN 0x0001000B +#define TRC_SCHED_DOM_TIMER_FN 0x0001000C +#define TRC_SCHED_FALLBACK_TIMER_FN 0x0001000D + +#define _HIGH32(_x) (_x >> 32) +#define _LOW32(_x) ((u32)_x ) + +/* Various timer handlers. */ +static void s_timer_fn(unsigned long unused); +static void t_timer_fn(unsigned long unused); +static void dom_timer_fn(unsigned long data); +static void fallback_timer_fn(unsigned long unused); + +/* this is global for now so that private implementations can reach it */ +schedule_data_t schedule_data[NR_CPUS]; + +/* XXX would be nice if the schedulers array could get populated + * automagically without having to hack the code in here */ +extern struct scheduler sched_bvt_def, sched_rrobin_def; +static struct scheduler *schedulers[] = { &sched_bvt_def, + &sched_rrobin_def, + NULL}; + +/* scheduler ops for the current scheduler */ +static struct scheduler ops; + +/* for scheduler functions that return void */ +#define SCHED_FN_VOID(fn, ...) do { if ( ops.fn ) ops.fn(__VA_ARGS__); } \ + while (0) + +/* for scheduler functions that return a numeric value */ +#define SCHED_FN_RET(fn, ...) \ + (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) : 0 ) spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned; @@ -62,110 +98,78 @@ static struct ac_timer t_timer[NR_CPUS]; */ static struct ac_timer fallback_timer[NR_CPUS]; -/* Various timer handlers. */ -static void s_timer_fn(unsigned long unused); -static void t_timer_fn(unsigned long unused); -static void dom_timer_fn(unsigned long data); -static void fallback_timer_fn(unsigned long unused); - -/* - * Wrappers for run-queue management. Must be called with the schedule_lock - * held. - */ -static inline void __add_to_runqueue_head(struct task_struct * p) -{ - list_add(&p->run_list, &schedule_data[p->processor].runqueue); -} +extern kmem_cache_t *task_struct_cachep; -static inline void __add_to_runqueue_tail(struct task_struct * p) +void free_task_struct(struct task_struct *p) { - list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue); + SCHED_FN_VOID(free_task, p); + kmem_cache_free(task_struct_cachep, p); } -static inline void __del_from_runqueue(struct task_struct * p) +/** + * alloc_task_struct - allocate a new task_struct and sched private structures + */ +struct task_struct *alloc_task_struct(void) { - list_del(&p->run_list); - p->run_list.next = NULL; -} + struct task_struct *p; -static inline int __task_on_runqueue(struct task_struct *p) -{ - return p->run_list.next != NULL; -} + p=((struct task_struct *)kmem_cache_alloc(task_struct_cachep,GFP_KERNEL)); -#define next_domain(p) \\ - list_entry((p)->run_list.next, struct task_struct, run_list) + if ( p == NULL ) + return NULL; -/* - * Calculate the effective virtual time for a domain. Take into account - * warping limits - */ -static void __calc_evt(struct task_struct *p) -{ - s_time_t now = NOW(); - if ( p->warpback ) - { - if ( ((now - p->warped) < p->warpl) && - ((now - p->uwarped) > p->warpu) ) - { - /* allowed to warp */ - p->evt = p->avt - p->warp; - } - else - { - /* warped for too long -> unwarp */ - p->evt = p->avt; - p->uwarped = now; - p->warpback = 0; - } - } - else + memset(p, 0, sizeof(*p)); + + if ( SCHED_FN_RET(alloc_task, p) < 0) { - p->evt = p->avt; + kmem_cache_free(task_struct_cachep, p); + return NULL; } + + return p; } - /* * Add and remove a domain */ void sched_add_domain(struct task_struct *p) { p->state = TASK_STOPPED; - p->mcu_advance = MCU_ADVANCE; - if ( p->domain == IDLE_DOMAIN_ID ) - { - p->avt = p->evt = ~0U; - schedule_data[p->processor].idle = p; - } - else + if( p->domain != IDLE_DOMAIN_ID ) { - /* Set avt end evt to system virtual time. */ - p->avt = schedule_data[p->processor].svt; - p->evt = schedule_data[p->processor].svt; - /* Set some default values here. */ - p->warpback = 0; - p->warp = 0; - p->warpl = 0; - p->warpu = 0; - /* Initialise the per-domain timer. */ init_ac_timer(&p->timer); p->timer.cpu = p->processor; p->timer.data = (unsigned long)p; p->timer.function = &dom_timer_fn; - } + else + { + schedule_data[p->processor].idle = p; + } + + SCHED_FN_VOID(add_task, p); + + TRACE_3D(TRC_SCHED_DOM_ADD, _HIGH32(p->domain), _LOW32(p->domain), p); } +/* XXX race condition here? we could both add and remove a domain at once, in + * theory. ick! */ +/* XXX is the task already removed from the runlist at this point? */ int sched_rem_domain(struct task_struct *p) { int x, y = p->state; do { if ( (x = y) == TASK_DYING ) return 0; } while ( (y = cmpxchg(&p->state, x, TASK_DYING)) != x ); + rem_ac_timer(&p->timer); + + SCHED_FN_VOID(rem_task, p); + + TRACE_3D(TRC_SCHED_DOM_REM, _HIGH32(p->domain), _LOW32(p->domain), p); + return 1; } @@ -174,6 +178,11 @@ void init_idle_task(void) { unsigned long flags; struct task_struct *p = current; + + if ( SCHED_FN_RET (alloc_task, p) < 0) + panic("Failed to allocate scheduler private data for idle task"); + SCHED_FN_VOID(add_task, p); + spin_lock_irqsave(&schedule_lock[p->processor], flags); p->has_cpu = 1; p->state = TASK_RUNNING; @@ -182,31 +191,25 @@ void init_idle_task(void) spin_unlock_irqrestore(&schedule_lock[p->processor], flags); } - void __wake_up(struct task_struct *p) { + TRACE_3D(TRC_SCHED_WAKE, _HIGH32(p->domain), _LOW32(p->domain), p); + ASSERT(p->state != TASK_DYING); if ( unlikely(__task_on_runqueue(p)) ) return; p->state = TASK_RUNNING; - __add_to_runqueue_head(p); - - /* set the BVT parameters */ - if (p->avt < schedule_data[p->processor].svt) - p->avt = schedule_data[p->processor].svt; - /* deal with warping here */ - p->warpback = 1; - p->warped = NOW(); - __calc_evt(p); + SCHED_FN_VOID(wake_up, p); #ifdef WAKEUP_HISTO p->wokenup = NOW(); #endif } + void wake_up(struct task_struct *p) { unsigned long flags; @@ -220,9 +223,10 @@ void wake_up(struct task_struct *p) */ static long do_block(void) { + ASSERT(current->domain != IDLE_DOMAIN_ID); set_bit(EVENTS_MASTER_ENABLE_BIT, ¤t->shared_info->events_mask); current->state = TASK_INTERRUPTIBLE; - current->warpback = 0; + TRACE_2D(TRC_SCHED_BLOCK, current->domain, current); __enter_scheduler(); return 0; } @@ -232,6 +236,7 @@ static long do_block(void) */ static long do_yield(void) { + TRACE_2D(TRC_SCHED_YIELD, current->domain, current); __enter_scheduler(); return 0; } @@ -295,37 +300,56 @@ long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo) add_ac_timer(&p->timer); } + TRACE_5D(TRC_SCHED_SET_TIMER, _HIGH32(p->domain), _LOW32(p->domain), + p, timeout_hi, timeout_lo); + return 0; } -/* Control the scheduler. */ -long sched_bvtctl(unsigned long c_allow) +/** + * sched_ctl - dispatch a scheduler control operation + * @cmd: the command passed in the dom0 op + * + * Given a generic scheduler control operation, call the control function for + * the scheduler in use, passing the appropriate control information from the + * union supplied. + */ +long sched_ctl(struct sched_ctl_cmd *cmd) { - ctx_allow = c_allow; - return 0; + TRACE_0D(TRC_SCHED_CTL); + + if ( cmd->if_ver != SCHED_CTL_IF_VER ) + return -EACCES; + + if ( cmd->sched_id != ops.sched_id ) + return -EINVAL; + + return SCHED_FN_RET(control, cmd); } + /* Adjust scheduling parameter for a given domain. */ -long sched_adjdom(domid_t dom, unsigned long mcu_adv, unsigned long warp, - unsigned long warpl, unsigned long warpu) +long sched_adjdom(struct sched_adjdom_cmd *cmd) { - struct task_struct *p; + struct task_struct *p; + + if ( cmd->if_ver != SCHED_CTL_IF_VER ) + return -EACCES; - /* Sanity -- this can avoid divide-by-zero. */ - if ( mcu_adv == 0 ) + if ( cmd->sched_id != ops.sched_id ) return -EINVAL; - p = find_domain_by_id(dom); - if ( p == NULL ) + p = find_domain_by_id(cmd->domain); + + if( p == NULL ) return -ESRCH; - spin_lock_irq(&schedule_lock[p->processor]); - p->mcu_advance = mcu_adv; - spin_unlock_irq(&schedule_lock[p->processor]); + TRACE_2D(TRC_SCHED_ADJDOM, _HIGH32(p->domain), _LOW32(p->domain)); - put_task_struct(p); + SCHED_FN_VOID(adjdom, p, cmd); + put_task_struct(p); return 0; } @@ -339,17 +363,19 @@ long sched_adjdom(domid_t dom, unsigned long mcu_adv, unsigned long warp, */ unsigned long __reschedule(struct task_struct *p) { - int cpu = p->processor; + int cpu = p->processor; struct task_struct *curr; s_time_t now, min_time; + TRACE_3D(TRC_SCHED_RESCHED, _HIGH32(p->domain), _LOW32(p->domain), p); + if ( unlikely(p->has_cpu || !__task_on_runqueue(p)) ) return 0; now = NOW(); curr = schedule_data[cpu].curr; /* domain should run at least for ctx_allow */ - min_time = curr->lastschd + ctx_allow; + min_time = curr->lastschd + curr->min_slice; if ( is_idle_task(curr) || (min_time <= now) ) { @@ -362,161 +388,67 @@ unsigned long __reschedule(struct task_struct *p) if ( schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP ) mod_ac_timer(&schedule_data[cpu].s_timer, min_time); - return 0; + return SCHED_FN_RET(reschedule, p); } - void reschedule(struct task_struct *p) { unsigned long flags, cpu_mask; + spin_lock_irqsave(&schedule_lock[p->processor], flags); cpu_mask = __reschedule(p); + spin_unlock_irqrestore(&schedule_lock[p->processor], flags); hyp_event_notify(cpu_mask); } - /* * The main function - * - deschedule the current domain. - * - pick a new domain. - * i.e., the domain with lowest EVT. - * The runqueue should be ordered by EVT so that is easy. + * - deschedule the current domain (scheduler independent). + * - pick a new domain (scheduler dependent). */ asmlinkage void __enter_scheduler(void) { - struct task_struct *prev = current, *next = NULL, *next_prime, *p; - struct list_head *tmp; + struct task_struct *prev = current, *next = NULL; int cpu = prev->processor; s_time_t now; + task_slice_t next_slice; s32 r_time; /* time for new dom to run */ - s32 ranfor; /* assume we never run longer than 2.1s! */ - s32 mcus; - u32 next_evt, next_prime_evt, min_avt; perfc_incrc(sched_run); + clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); + spin_lock_irq(&schedule_lock[cpu]); now = NOW(); rem_ac_timer(&schedule_data[cpu].s_timer); - + ASSERT(!in_interrupt()); ASSERT(__task_on_runqueue(prev)); ASSERT(prev->state != TASK_UNINTERRUPTIBLE); + ASSERT(prev != NULL); - if ( likely(!is_idle_task(prev)) ) + if ( prev->state == TASK_INTERRUPTIBLE ) { - ranfor = (s32)(now - prev->lastschd); - prev->cpu_time += ranfor; - - /* Calculate mcu and update avt. */ - mcus = (ranfor + MCU - 1) / MCU; - prev->avt += mcus * prev->mcu_advance; - - __calc_evt(prev); - - __del_from_runqueue(prev); - - if ( likely(prev->state == TASK_RUNNING) || - unlikely((prev->state == TASK_INTERRUPTIBLE) && - signal_pending(prev)) ) - { + /* this check is needed to avoid a race condition */ + if ( signal_pending(prev) ) prev->state = TASK_RUNNING; - __add_to_runqueue_tail(prev); - } + else + SCHED_FN_VOID(do_block, prev); } - clear_bit(_HYP_EVENT_NEED_RESCHED, &prev->hyp_events); - - /* We should at least have the idle task */ - ASSERT(!list_empty(&schedule_data[cpu].runqueue)); - - /* - * scan through the run queue and pick the task with the lowest evt - * *and* the task the second lowest evt. - * this code is O(n) but we expect n to be small. - */ - next = schedule_data[cpu].idle; - next_prime = NULL; - - next_evt = ~0U; - next_prime_evt = ~0U; - min_avt = ~0U; - - list_for_each ( tmp, &schedule_data[cpu].runqueue ) - { - p = list_entry(tmp, struct task_struct, run_list); - if ( p->evt < next_evt ) - { - next_prime = next; - next_prime_evt = next_evt; - next = p; - next_evt = p->evt; - } - else if ( next_prime_evt == ~0U ) - { - next_prime_evt = p->evt; - next_prime = p; - } - else if ( p->evt < next_prime_evt ) - { - next_prime_evt = p->evt; - next_prime = p; - } - - /* Determine system virtual time. */ - if ( p->avt < min_avt ) - min_avt = p->avt; - } + /* get policy-specific decision on scheduling... */ + next_slice = ops.do_schedule(now); - /* Update system virtual time. */ - if ( min_avt != ~0U ) - schedule_data[cpu].svt = min_avt; + r_time = next_slice.time; + next = next_slice.task; - /* check for virtual time overrun on this cpu */ - if ( schedule_data[cpu].svt >= 0xf0000000 ) - { - u_long t_flags; - write_lock_irqsave(&tasklist_lock, t_flags); - for_each_domain ( p ) - { - if ( p->processor == cpu ) - { - p->evt -= 0xe0000000; - p->avt -= 0xe0000000; - } - } - write_unlock_irqrestore(&tasklist_lock, t_flags); - schedule_data[cpu].svt -= 0xe0000000; - } - - /* work out time for next run through scheduler */ - if ( is_idle_task(next) ) - { - r_time = ctx_allow; - goto sched_done; - } - - if ( (next_prime == NULL) || is_idle_task(next_prime) ) - { - /* We have only one runnable task besides the idle task. */ - r_time = 10 * ctx_allow; /* RN: random constant */ - goto sched_done; - } - - /* - * If we are here then we have two runnable tasks. - * Work out how long 'next' can run till its evt is greater than - * 'next_prime's evt. Take context switch allowance into account. - */ - ASSERT(next_prime->evt >= next->evt); - - r_time = ((next_prime->evt - next->evt)/next->mcu_advance) + ctx_allow; + if ( likely(!is_idle_task(prev)) ) + prev->cpu_time += (now - prev->lastschd); - sched_done: - ASSERT(r_time >= ctx_allow); + /* now, switch to the new task... */ prev->has_cpu = 0; next->has_cpu = 1; @@ -537,7 +469,7 @@ asmlinkage void __enter_scheduler(void) if ( unlikely(prev == next) ) return; - + perfc_incrc(sched_ctx); #if defined(WAKEUP_HISTO) @@ -558,6 +490,10 @@ asmlinkage void __enter_scheduler(void) } #endif + TRACE_2D(TRC_SCHED_SWITCH, next->domain, next); + + ASSERT(next->processor == current->processor); + switch_to(prev, next); if ( unlikely(prev->state == TASK_DYING) ) @@ -591,6 +527,8 @@ int idle_cpu(int cpu) /* The scheduler timer: force a run through the scheduler*/ static void s_timer_fn(unsigned long unused) { + TRACE_0D(TRC_SCHED_S_TIMER_FN); + set_bit(_HYP_EVENT_NEED_RESCHED, ¤t->hyp_events); perfc_incrc(sched_irq); } @@ -600,6 +538,8 @@ static void t_timer_fn(unsigned long unused) { struct task_struct *p = current; + TRACE_0D(TRC_SCHED_T_TIMER_FN); + if ( !is_idle_task(p) ) set_bit(_EVENT_TIMER, &p->shared_info->events); @@ -613,6 +553,8 @@ static void dom_timer_fn(unsigned long data) unsigned long cpu_mask = 0; struct task_struct *p = (struct task_struct *)data; + TRACE_0D(TRC_SCHED_DOM_TIMER_FN); + cpu_mask |= mark_guest_event(p, _EVENT_TIMER); guest_event_notify(cpu_mask); } @@ -623,6 +565,8 @@ static void fallback_timer_fn(unsigned long unused) { struct task_struct *p = current; + TRACE_0D(TRC_SCHED_FALLBACK_TIMER_FN); + if ( !is_idle_task(p) ) update_dom_time(p->shared_info); @@ -660,6 +604,29 @@ void __init scheduler_init(void) } schedule_data[0].idle = &idle0_task; + + extern char opt_sched[]; + + for ( i = 0; schedulers[i] != NULL; i++ ) + { + ops = *schedulers[i]; /* fetch operations structure */ + + if(strcmp(ops.opt_name, opt_sched) == 0) + break; + } + + if ( schedulers[i] == NULL ) + printk("Could not find scheduler: %s\n", opt_sched); + + printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); + + if ( ops.do_schedule == NULL) + panic("Chosen scheduler has NULL do_schedule!"); + + if ( SCHED_FN_RET(init_scheduler) < 0 ) + panic("Initialising scheduler failed!"); + + SCHED_FN_VOID(add_task, &idle0_task); } /* @@ -698,12 +665,9 @@ static void dump_rqueue(struct list_head *queue, char *name) (unsigned long) queue->next, (unsigned long) queue->prev); list_for_each (list, queue) { p = list_entry(list, struct task_struct, run_list); - printk("%3d: %llu has=%c mcua=0x%04lX" - " ev=0x%08X av=0x%08X c=0x%X%08X\n", - loop++, p->domain, - p->has_cpu ? 'T':'F', - p->mcu_advance, p->evt, p->avt, - (u32)(p->cpu_time>>32), (u32)p->cpu_time); + printk("%3d: %llu has=%c ", loop++, p->domain, p->has_cpu ? 'T':'F'); + SCHED_FN_VOID(dump_runq_el, p); + printk("c=0x%X%08X\n", (u32)(p->cpu_time>>32), (u32)p->cpu_time); printk(" l: %lx n: %lx p: %lx\n", (unsigned long)list, (unsigned long)list->next, (unsigned long)list->prev); @@ -717,11 +681,13 @@ void dump_runq(u_char key, void *dev_id, struct pt_regs *regs) s_time_t now = NOW(); int i; - printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns NOW=0x%08X%08X\n", - (u32)MCU, (u32)ctx_allow, (u32)(now>>32), (u32)now); + printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name); + SCHED_FN_VOID(dump_settings); + printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now); for (i = 0; i < smp_num_cpus; i++) { spin_lock_irqsave(&schedule_lock[i], flags); - printk("CPU[%02d] svt=0x%08X ", i, (s32)schedule_data[i].svt); + printk("CPU[%02d] ", i); + SCHED_FN_VOID(dump_cpu_state,i); dump_rqueue(&schedule_data[i].runqueue, "rq"); spin_unlock_irqrestore(&schedule_lock[i], flags); } diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h index 8e84f38336..d06a9f641a 100644 --- a/xen/include/hypervisor-ifs/dom0_ops.h +++ b/xen/include/hypervisor-ifs/dom0_ops.h @@ -11,13 +11,14 @@ #define __DOM0_OPS_H__ #include "hypervisor-if.h" +#include "sched-ctl.h" /* * Make sure you increment the interface version whenever you modify this file! * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0xAAAA0008 +#define DOM0_INTERFACE_VERSION 0xAAAA0009 #define MAX_CMD_LEN 256 #define MAX_DOMAIN_NAME 16 @@ -74,23 +75,13 @@ typedef struct dom0_builddomain_st full_execution_context_t ctxt; } dom0_builddomain_t; -#define DOM0_BVTCTL 6 -typedef struct dom0_bvtctl_st -{ - /* IN variables. */ - unsigned long ctx_allow; /* context switch allowance */ -} dom0_bvtctl_t; +#define DOM0_SCHEDCTL 6 + /* struct sched_ctl_cmd is from sched-ctl.h */ +typedef struct sched_ctl_cmd dom0_schedctl_t; #define DOM0_ADJUSTDOM 7 -typedef struct dom0_adjustdom_st -{ - /* IN variables. */ - domid_t domain; /* domain id */ - unsigned long mcu_adv; /* mcu advance: inverse of weight */ - unsigned long warp; /* time warp */ - unsigned long warpl; /* warp limit */ - unsigned long warpu; /* unwarp time requirement */ -} dom0_adjustdom_t; +/* struct sched_adjdom_cmd is from sched-ctl.h */ +typedef struct sched_adjdom_cmd dom0_adjustdom_t; #define DOM0_GETDOMAININFO 12 typedef struct dom0_getdomaininfo_st @@ -234,7 +225,7 @@ typedef struct dom0_op_st dom0_stopdomain_t stopdomain; dom0_destroydomain_t destroydomain; dom0_getmemlist_t getmemlist; - dom0_bvtctl_t bvtctl; + dom0_schedctl_t schedctl; dom0_adjustdom_t adjustdom; dom0_builddomain_t builddomain; dom0_getdomaininfo_t getdomaininfo; diff --git a/xen/include/hypervisor-ifs/sched-ctl.h b/xen/include/hypervisor-ifs/sched-ctl.h new file mode 100644 index 0000000000..bb0449908a --- /dev/null +++ b/xen/include/hypervisor-ifs/sched-ctl.h @@ -0,0 +1,68 @@ +/** + * Generic scheduler control interface. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + */ + +#ifndef _SCHED_CTL_H_ +#define _SCHED_CTL_H_ + +/** + * When this file is changed, increment the version number. This ensures that + * tools will refuse to work (rather than causing a crash) when they're + * out-of-sync with the Xen version number. + */ +#define SCHED_CTL_IF_VER 0x0001 + +/* scheduler types */ +#define SCHED_BVT 0 +#define SCHED_ATROPOS 1 +#define SCHED_RROBIN 2 + +/* generic scheduler control command - union of all scheduler control + * command structures */ +struct sched_ctl_cmd +{ + unsigned int if_ver; + unsigned int sched_id; + + union + { + struct bvt_ctl + { + /* IN variables. */ + unsigned long ctx_allow; /* context switch allowance */ + } bvt; + + struct rrobin_ctl + { + /* IN variables */ + u64 slice; /* round robin time slice */ + } rrobin; + } u; +}; + +struct sched_adjdom_cmd +{ + unsigned int if_ver; + unsigned int sched_id; + domid_t domain; + + union + { + struct bvt_adjdom + { + unsigned long mcu_adv; /* mcu advance: inverse of weight */ + unsigned long warp; /* time warp */ + unsigned long warpl; /* warp limit */ + unsigned long warpu; /* unwarp time requirement */ + } bvt; + + struct atropos_adjdom + { + int xtratime; + } atropos; + } u; +}; + +#endif /* _SCHED_CTL_H_ */ diff --git a/xen/include/xeno/sched-if.h b/xen/include/xeno/sched-if.h new file mode 100644 index 0000000000..683e73d4f6 --- /dev/null +++ b/xen/include/xeno/sched-if.h @@ -0,0 +1,90 @@ +#include <asm/types.h> + +/* + * Additional declarations for the generic scheduler interface. This should + * only be included by files that implement conforming schedulers. + * + * Portions by Mark Williamson are (C) 2004 Intel Research Cambridge + */ + +#define BUCKETS 10 + +typedef struct schedule_data_st +{ + struct list_head runqueue; /* runqueue */ + struct task_struct *curr; /* current task */ + struct task_struct *idle; /* idle task for this cpu */ + void * sched_priv; + struct ac_timer s_timer; /* scheduling timer */ +#ifdef BUCKETS + u32 hist[BUCKETS]; /* for scheduler latency histogram */ +#endif +} __cacheline_aligned schedule_data_t; + + +typedef struct task_slice_st +{ + struct task_struct *task; + s_time_t time; +} task_slice_t; + +struct scheduler +{ + char *name; /* full name for this scheduler */ + char *opt_name; /* option name for this scheduler */ + unsigned int sched_id; /* ID for this scheduler */ + + int (*init_scheduler) (); + int (*alloc_task) (struct task_struct *); + void (*add_task) (struct task_struct *); + void (*free_task) (struct task_struct *); + void (*rem_task) (struct task_struct *); + void (*wake_up) (struct task_struct *); + /* XXX why does do_block need to return anything at all? */ + long (*do_block) (struct task_struct *); + task_slice_t (*do_schedule) (s_time_t); + int (*control) (struct sched_ctl_cmd *); + int (*adjdom) (struct task_struct *, + struct sched_adjdom_cmd *); + s32 (*reschedule) (struct task_struct *); + void (*dump_settings) (void); + void (*dump_cpu_state) (int); + void (*dump_runq_el) (struct task_struct *); +}; + +/* per CPU scheduler information */ +extern schedule_data_t schedule_data[]; + +/* + * Wrappers for run-queue management. Must be called with the schedule_lock + * held. + */ +static inline void __add_to_runqueue_head(struct task_struct * p) +{ + list_add(&p->run_list, &schedule_data[p->processor].runqueue); +} + +static inline void __add_to_runqueue_tail(struct task_struct * p) +{ + list_add_tail(&p->run_list, &schedule_data[p->processor].runqueue); +} + +static inline void __del_from_runqueue(struct task_struct * p) +{ + list_del(&p->run_list); + p->run_list.next = NULL; +} + +static inline int __task_on_runqueue(struct task_struct *p) +{ + return p->run_list.next != NULL; +} + +#define next_domain(p) \\ + list_entry((p)->run_list.next, struct task_struct, run_list) + + +static inline int __runqueue_empty(int cpu) +{ + return list_empty(&schedule_data[cpu].runqueue); +} diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 5f728565ca..dea80d0833 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -114,16 +114,9 @@ struct task_struct s_time_t wokenup; /* time domain got woken up */ struct ac_timer timer; /* one-shot timer for timeout values */ - /* BVT scheduler specific. */ - unsigned long mcu_advance; /* inverse of weight */ - u32 avt; /* actual virtual time */ - u32 evt; /* effective virtual time */ - int warpback; /* warp? */ - long warp; /* virtual time warp */ - long warpl; /* warp limit */ - long warpu; /* unwarp time requirement */ - s_time_t warped; /* time it ran warped last time */ - s_time_t uwarped; /* time it ran unwarped last time */ + s_time_t min_slice; /* minimum time before reschedule */ + + void *sched_priv; /* scheduler-specific data */ /* Network I/O */ net_vif_t *net_vif_list[MAX_DOMAIN_VIFS]; @@ -177,6 +170,7 @@ struct task_struct #define TASK_UNINTERRUPTIBLE 2 #define TASK_STOPPED 4 #define TASK_DYING 8 +#define TASK_SCHED_PRIV 16 #include <asm/uaccess.h> /* for KERNEL_DS */ @@ -186,8 +180,6 @@ struct task_struct domain: IDLE_DOMAIN_ID, \ state: TASK_RUNNING, \ has_cpu: 0, \ - evt: 0xffffffff, \ - avt: 0xffffffff, \ mm: IDLE0_MM, \ addr_limit: KERNEL_DS, \ thread: INIT_THREAD, \ @@ -202,9 +194,9 @@ extern struct task_struct *idle_task[NR_CPUS]; #include <xeno/slab.h> -extern kmem_cache_t *task_struct_cachep; -#define alloc_task_struct() \ - ((struct task_struct *)kmem_cache_alloc(task_struct_cachep,GFP_KERNEL)) +void free_task_struct(struct task_struct *p); +struct task_struct *alloc_task_struct(); + #define put_task_struct(_p) \ if ( atomic_dec_and_test(&(_p)->refcnt) ) release_task(_p) #define get_task_struct(_p) \ @@ -251,15 +243,14 @@ extern spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned; void scheduler_init(void); void schedulers_start(void); void sched_add_domain(struct task_struct *p); -int sched_rem_domain(struct task_struct *p); -long sched_bvtctl(unsigned long ctx_allow); -long sched_adjdom(domid_t dom, unsigned long mcu_adv, unsigned long warp, - unsigned long warpl, unsigned long warpu); +int sched_rem_domain(struct task_struct *p); +long sched_ctl(struct sched_ctl_cmd *); +long sched_adjdom(struct sched_adjdom_cmd *); void init_idle_task(void); void __wake_up(struct task_struct *p); void wake_up(struct task_struct *p); -unsigned long __reschedule(struct task_struct *p); void reschedule(struct task_struct *p); +unsigned long __reschedule(struct task_struct *p); /* NB. Limited entry in Xen. Not for arbitrary use! */ asmlinkage void __enter_scheduler(void); @@ -302,4 +293,4 @@ extern struct task_struct *task_list; extern void update_process_times(int user); -#endif +#endif /*_LINUX_SCHED_H */ |