aboutsummaryrefslogtreecommitdiffstats
path: root/docs
diff options
context:
space:
mode:
Diffstat (limited to 'docs')
-rw-r--r--docs/figs/xenserver.obj312
-rw-r--r--docs/misc/blkif-drivers-explained.txt4
-rw-r--r--docs/src/interface.tex896
-rw-r--r--docs/src/user.tex280
4 files changed, 1070 insertions, 422 deletions
diff --git a/docs/figs/xenserver.obj b/docs/figs/xenserver.obj
new file mode 100644
index 0000000000..4d2da1183f
--- /dev/null
+++ b/docs/figs/xenserver.obj
@@ -0,0 +1,312 @@
+%TGIF 4.1.8
+state(0,37,100.000,0,108,0,4,1,16,2,2,2,0,1,2,1,1,'Helvetica-Oblique',2,80640,0,8,1,5,-4,0,1,1,0,16,1,0,1,1,1,1,1088,1408,0,0,2880,0).
+%
+% @(#)$Header$
+% %W%
+%
+unit("1 pixel/pixel").
+color_info(28,65535,0,[
+ "black", 0, 0, 0, 0, 0, 0, 1,
+ "gray10", 6682, 6682, 6682, 6682, 6682, 6682, 1,
+ "gray20", 13107, 13107, 13107, 13107, 13107, 13107, 1,
+ "gray30", 19789, 19789, 19789, 19789, 19789, 19789, 1,
+ "gray40", 26214, 26214, 26214, 26214, 26214, 26214, 1,
+ "gray50", 32639, 32639, 32639, 32639, 32639, 32639, 1,
+ "gray60", 39321, 39321, 39321, 39321, 39321, 39321, 1,
+ "gray70", 46003, 46003, 46003, 46003, 46003, 46003, 1,
+ "gray80", 52428, 52428, 52428, 52428, 52428, 52428, 1,
+ "gray90", 58853, 58853, 58853, 58853, 58853, 58853, 1,
+ "white", 65535, 65535, 65535, 65535, 65535, 65535, 1,
+ "red", 65535, 0, 0, 65535, 0, 0, 1,
+ "orange", 65535, 42405, 0, 65535, 42405, 0, 1,
+ "yellow", 65535, 65535, 0, 65535, 65535, 0, 1,
+ "green", 0, 65535, 0, 0, 65535, 0, 1,
+ "blue", 0, 0, 65535, 0, 0, 65535, 1,
+ "blue4", 0, 0, 35723, 0, 0, 35723, 1,
+ "violet", 61166, 33410, 61166, 61166, 33410, 61166, 1,
+ "magenta", 65535, 0, 65535, 65535, 0, 65535, 1,
+ "cyan", 0, 65535, 65535, 0, 65535, 65535, 1,
+ "wheat", 62965, 57054, 46003, 62965, 57054, 46003, 1,
+ "wheat3", 52685, 47802, 38550, 52685, 47802, 38550, 1,
+ "wheat4", 35723, 32382, 26214, 35723, 32382, 26214, 1,
+ "pink", 65535, 49344, 52171, 65535, 49344, 52171, 1,
+ "palegreen", 39064, 64507, 39064, 39064, 64507, 39064, 1,
+ "skyblue", 34695, 52942, 60395, 34695, 52942, 60395, 1,
+ "CadetBlue", 24415, 40606, 41120, 24415, 40606, 41120, 1,
+ "DarkSlateGray", 12079, 20303, 20303, 12079, 20303, 20303, 1
+]).
+script_frac("0.6").
+fg_bg_colors('blue4','gray90').
+page(1,"",1,'').
+group([
+rcbox('gray90','',375,225,440,435,1,2,1,8,16,69683,0,0,0,0,'2',0,[
+]),
+rcbox('gray20','',375,225,440,435,0,2,1,8,16,69684,0,0,0,0,'2',0,[
+])
+],
+69682,0,0,[
+]).
+group([
+rcbox('gray90','',450,225,515,435,1,2,1,8,16,69623,0,0,0,0,'2',0,[
+]),
+rcbox('gray20','',450,225,515,435,0,2,1,8,16,69624,0,0,0,0,'2',0,[
+])
+],
+69622,0,0,[
+]).
+group([
+rcbox('gray90','',525,225,590,435,1,2,1,8,16,69119,0,0,0,0,'2',0,[
+]),
+rcbox('gray20','',525,225,590,435,0,2,1,8,16,69120,0,0,0,0,'2',0,[
+])
+],
+69366,0,0,[
+]).
+box('gray40','',227,457,607,502,1,2,1,69020,0,0,0,0,0,'2',0,[
+]).
+box('gray40','',235,335,340,435,1,2,1,69017,0,0,0,0,0,'2',0,[
+]).
+box('gray40','',235,230,340,330,1,2,1,69001,0,0,0,0,0,'2',0,[
+]).
+box('gray80','',230,330,335,430,1,2,1,68660,0,0,0,0,0,'2',0,[
+]).
+box('gray80','',230,225,335,325,1,2,1,68663,0,0,0,0,0,'2',0,[
+]).
+box('gray70','',222,452,602,497,1,2,1,68416,0,0,0,0,0,'2',0,[
+]).
+text('black',621,451,3,1,1,14,55,68422,18,5,0,-7,0,0,2,14,55,-1,2,"",0,0,0,0,469,'',[
+minilines(14,55,-1,2,1,-7,0,[
+mini_line(12,18,5,-1,2,0,[
+str_block(0,12,18,5,-1,2,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,12,18,5,-1,2,0,0,0,0,0,
+ "X")])
+]),
+mini_line(12,18,5,0,1,0,[
+str_block(0,12,18,5,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,12,18,5,0,1,0,0,0,0,0,
+ "E")])
+]),
+mini_line(14,18,5,0,1,0,[
+str_block(0,14,18,5,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,14,18,5,0,1,0,0,0,0,0,
+ "N")])
+])
+])]).
+text('black',282,244,3,1,1,99,60,68643,16,4,0,0,0,0,2,99,60,-1,0,"",0,0,0,0,260,'',[
+minilines(99,60,-1,0,1,0,0,[
+mini_line(98,16,4,0,0,0,[
+str_block(0,98,16,4,0,-4,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,97920,98,16,4,0,-4,0,0,0,0,0,
+ "Control and ")])
+]),
+mini_line(99,16,4,-1,0,0,[
+str_block(0,99,16,4,-1,0,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,97920,99,16,4,-1,0,0,0,0,0,0,
+ "Management")])
+]),
+mini_line(69,16,4,0,0,0,[
+str_block(0,69,16,4,0,-1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,97920,69,16,4,0,-1,0,0,0,0,0,
+ "Software")])
+])
+])]).
+text('black',280,349,2,1,1,85,42,68748,18,5,0,-4,0,0,2,85,42,0,1,"",0,0,0,0,367,'',[
+minilines(85,42,0,1,1,-4,0,[
+mini_line(85,18,5,0,1,0,[
+str_block(0,85,18,5,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,85,18,5,0,1,0,0,0,0,0,
+ "Privileged")])
+]),
+mini_line(79,18,5,0,0,0,[
+str_block(0,79,18,5,0,0,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,79,18,5,0,0,0,0,0,0,0,
+ "GuestOS")])
+])
+])]).
+text('black',280,393,1,1,1,74,17,68749,14,3,0,-4,0,0,2,74,17,0,0,"",0,0,0,0,407,'',[
+minilines(74,17,0,0,1,-4,0,[
+mini_line(74,14,3,0,0,0,[
+str_block(0,74,14,3,0,0,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,80640,74,14,3,0,0,0,0,0,0,0,
+ "(XenLinux)")])
+])
+])]).
+box('gray80','',236,459,351,489,1,2,1,68474,0,0,0,0,0,'2',0,[
+]).
+text('black',291,465,1,1,1,92,17,68470,14,3,0,-4,0,0,2,92,17,0,3,"",0,0,0,0,479,'',[
+minilines(92,17,0,3,1,-4,0,[
+mini_line(92,14,3,0,3,0,[
+str_block(0,92,14,3,0,3,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,80640,92,14,3,0,3,0,0,0,0,0,
+ "VM control i/f")])
+])
+])]).
+box('gray80','',396,459,586,489,1,2,1,68895,0,0,0,0,0,'2',0,[
+]).
+text('black',496,465,1,1,1,143,17,68896,14,3,0,-4,0,0,2,143,17,0,1,"",0,0,0,0,479,'',[
+minilines(143,17,0,1,1,-4,0,[
+mini_line(143,14,3,0,1,0,[
+str_block(0,143,14,3,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,80640,143,14,3,0,1,0,0,0,0,0,
+ "Virtualized Hardware")])
+])
+])]).
+poly('black','',2,[
+ 355,473,395,473],1,4,1,68913,0,2,0,0,0,0,0,'4',0,0,
+ "0","",[
+ 0,14,6,0,'14','6','0'],[0,14,6,0,'14','6','0'],[
+]).
+poly('black','',2,[
+ 280,423,280,453],1,4,1,68941,0,2,0,0,0,0,0,'4',0,0,
+ "0","",[
+ 0,14,6,0,'14','6','0'],[0,14,6,0,'14','6','0'],[
+]).
+poly('black','',2,[
+ 255,313,255,343],1,2,1,68983,0,2,0,0,0,0,0,'2',0,0,
+ "0","",[
+ 0,10,4,0,'10','4','0'],[0,10,4,0,'10','4','0'],[
+]).
+poly('black','',2,[
+ 305,313,305,343],1,2,1,68984,0,2,0,0,0,0,0,'2',0,0,
+ "0","",[
+ 0,10,4,0,'10','4','0'],[0,10,4,0,'10','4','0'],[
+]).
+poly('black','',2,[
+ 280,313,280,343],1,2,1,68989,0,2,0,0,0,0,0,'2',0,0,
+ "0","",[
+ 0,10,4,0,'10','4','0'],[0,10,4,0,'10','4','0'],[
+]).
+box('gray70','',284,520,609,555,1,2,1,68490,0,0,0,0,0,'2',0,[
+]).
+text('black',444,527,1,1,1,276,23,68493,18,5,0,-8,0,0,2,276,23,0,0,"",0,0,0,0,545,'',[
+minilines(276,23,0,0,1,-8,0,[
+mini_line(276,18,5,0,0,0,[
+str_block(0,276,18,5,0,-1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,276,18,5,0,-1,0,0,0,0,0,
+ "H/W (SMP x86, mem, net, block)")])
+])
+])]).
+poly('black','',2,[
+ 445,483,445,518],3,3,1,68528,0,2,0,0,0,0,0,'3',0,0,
+ "0","",[
+ 0,12,5,0,'12','5','0'],[0,12,5,0,'12','5','0'],[
+]).
+poly('black','',2,[
+ 500,483,500,518],3,3,1,68529,0,2,0,0,0,0,0,'3',0,0,
+ "0","",[
+ 0,12,5,0,'12','5','0'],[0,12,5,0,'12','5','0'],[
+]).
+poly('black','',2,[
+ 555,483,555,518],3,3,1,68530,0,2,0,0,0,0,0,'3',0,0,
+ "0","",[
+ 0,12,5,0,'12','5','0'],[0,12,5,0,'12','5','0'],[
+]).
+text('black',405,254,2,1,1,34,36,68698,16,4,0,-4,0,0,2,34,36,0,1,"",0,0,0,0,270,'',[
+minilines(34,36,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(32,16,4,0,1,0,[
+str_block(0,32,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,32,16,4,0,1,0,0,0,0,0,
+ "S/W")])
+])
+])]).
+text('black',405,354,3,1,1,44,52,69100,16,4,0,-4,0,0,2,44,52,0,1,"",0,0,0,0,370,'',[
+minilines(44,52,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(44,16,4,0,0,0,[
+str_block(0,44,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,44,16,4,0,0,0,0,0,0,0,
+ "Guest")])
+]),
+mini_line(24,16,4,0,0,0,[
+str_block(0,24,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,24,16,4,0,0,0,0,0,0,0,
+ "OS")])
+])
+])]).
+text('black',480,254,2,1,1,34,36,69114,16,4,0,-4,0,0,2,34,36,0,1,"",0,0,0,0,270,'',[
+minilines(34,36,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(32,16,4,0,1,0,[
+str_block(0,32,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,32,16,4,0,1,0,0,0,0,0,
+ "S/W")])
+])
+])]).
+text('black',480,354,3,1,1,44,52,69115,16,4,0,-4,0,0,2,44,52,0,1,"",0,0,0,0,370,'',[
+minilines(44,52,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(44,16,4,0,0,0,[
+str_block(0,44,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,44,16,4,0,0,0,0,0,0,0,
+ "Guest")])
+]),
+mini_line(24,16,4,0,0,0,[
+str_block(0,24,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,24,16,4,0,0,0,0,0,0,0,
+ "OS")])
+])
+])]).
+text('black',555,254,2,1,1,34,36,69116,16,4,0,-4,0,0,2,34,36,0,1,"",0,0,0,0,270,'',[
+minilines(34,36,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(32,16,4,0,1,0,[
+str_block(0,32,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,32,16,4,0,1,0,0,0,0,0,
+ "S/W")])
+])
+])]).
+text('black',555,354,3,1,1,44,52,69117,16,4,0,-4,0,0,2,44,52,0,1,"",0,0,0,0,370,'',[
+minilines(44,52,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(44,16,4,0,0,0,[
+str_block(0,44,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,44,16,4,0,0,0,0,0,0,0,
+ "Guest")])
+]),
+mini_line(24,16,4,0,0,0,[
+str_block(0,24,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,24,16,4,0,0,0,0,0,0,0,
+ "OS")])
+])
+])]).
+text('black',282,201,1,1,1,116,17,69753,14,3,2,-4,0,0,2,116,17,0,2,"",0,0,0,0,215,'',[
+minilines(116,17,0,2,1,-4,0,[
+mini_line(116,14,3,0,2,0,[
+str_block(0,116,14,3,0,2,0,0,0,[
+str_seg('blue4','Helvetica-BoldOblique',3,80640,116,14,3,0,2,0,0,0,0,0,
+ "Management VM")])
+])
+])]).
+text('black',480,201,1,1,1,146,17,69770,14,3,2,-4,0,0,2,146,17,0,0,"",0,0,0,0,215,'',[
+minilines(146,17,0,0,1,-4,0,[
+mini_line(146,14,3,0,0,0,[
+str_block(0,146,14,3,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,80640,146,14,3,0,0,0,0,0,0,0,
+ "User Virtual Machines")])
+])
+])]).
diff --git a/docs/misc/blkif-drivers-explained.txt b/docs/misc/blkif-drivers-explained.txt
index 8f6f7a498a..adf1d3738a 100644
--- a/docs/misc/blkif-drivers-explained.txt
+++ b/docs/misc/blkif-drivers-explained.txt
@@ -244,8 +244,8 @@ records. Pointers may only advance, and may not pass one another.
By adopting the convention that every request will receive a response,
not all four pointers need be shared and flow control on the ring
becomes very easy to manage. Each domain manages its own
-consumer pointer, and the two producer pointers are visible to both (Xen/include/hypervisor-ifs/io/blkif.h):
-
+consumer pointer, and the two producer pointers are visible to both
+(xen/include/public/io/blkif.h):
/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <=PAGE_SIZE.*/
diff --git a/docs/src/interface.tex b/docs/src/interface.tex
index 6dc6879df4..752ab1d157 100644
--- a/docs/src/interface.tex
+++ b/docs/src/interface.tex
@@ -1,5 +1,6 @@
\documentclass[11pt,twoside,final,openright]{xenstyle}
\usepackage{a4,graphicx,setspace,times}
+\usepackage{comment,parskip}
\setstretch{1.15}
\begin{document}
@@ -16,12 +17,19 @@
{\Huge \bf Interface manual} \\[4mm]
{\huge Xen v2.0 for x86} \\[80mm]
-{\Large Xen is Copyright (c) 2004, The Xen Team} \\[3mm]
+{\Large Xen is Copyright (c) 2002-2004, The Xen Team} \\[3mm]
{\Large University of Cambridge, UK} \\[20mm]
-{\large Last updated on 11th March, 2004}
\end{tabular}
-\vfill
\end{center}
+
+{\bf
+DISCLAIMER: This documentation is currently under active development
+and as such there may be mistakes and omissions --- watch out for
+these and please report any you find to the developer's mailing list.
+Contributions of material, suggestions and corrections are welcome.
+}
+
+\vfill
\cleardoublepage
% TABLE OF CONTENTS
@@ -45,205 +53,270 @@
\setstretch{1.15}
\chapter{Introduction}
-Xen allows the hardware resouces of a machine to be virtualized and
-dynamically partitioned such as to allow multiple different 'guest'
-operating system images to be run simultaneously.
-
-Virtualizing the machine in this manner provides flexibility allowing
-different users to choose their preferred operating system (Windows,
-Linux, NetBSD, or a custom operating system). Furthermore, Xen provides
-secure partitioning between these 'domains', and enables better resource
+
+Xen allows the hardware resources of a machine to be virtualized and
+dynamically partitioned, allowing multiple different {\em guest}
+operating system images to be run simultaneously. Virtualizing the
+machine in this manner provides considerable flexibility, for example
+allowing different users to choose their preferred operating system
+(e.g., Linux, NetBSD, or a custom operating system). Furthermore, Xen
+provides secure partitioning between virtual machines (known as
+{\em domains} in Xen terminology), and enables better resource
accounting and QoS isolation than can be achieved with a conventional
-operating system.
-
-The hypervisor runs directly on server hardware and dynamically partitions
-it between a number of {\it domains}, each of which hosts an instance
-of a {\it guest operating system}. The hypervisor provides just enough
-abstraction of the machine to allow effective isolation and resource
-management between these domains.
-
-Xen essentially takes a virtual machine approach as pioneered by IBM
-VM/370. However, unlike VM/370 or more recent efforts such as VMWare
-and Virtual PC, Xen doesn not attempt to completely virtualize the
-underlying hardware. Instead parts of the hosted guest operating
-systems are modified to work with the hypervisor; the operating system
-is effectively ported to a new target architecture, typically
-requiring changes in just the machine-dependent code. The user-level
-API is unchanged, thus existing binaries and operating system
-distributions can work unmodified.
-
-In addition to exporting virtualized instances of CPU, memory, network and
-block devicees, Xen exposes a control interface to set how these resources
-are shared between the running domains. The control interface is privileged
-and may only be accessed by one particular virtual machine: {\it domain0}.
-This domain is a required part of any Xen-base server and runs the application
-software that manages the control-plane aspects of the platform. Running the
-control software in {\it domain0}, distinct from the hypervisor itself, allows
-the Xen framework to separate the notions of {\it mechanism} and {\it policy}
-within the system.
-
-
-\chapter{CPU state}
+operating system.
+
+Xen essentially takes a `whole machine' virtualization approach as
+pioneered by IBM VM/370. However, unlike VM/370 or more recent
+efforts such as VMWare and Virtual PC, Xen does not attempt to
+completely virtualize the underlying hardware. Instead parts of the
+hosted guest operating systems are modified to work with the VMM; the
+operating system is effectively ported to a new target architecture,
+typically requiring changes in just the machine-dependent code. The
+user-level API is unchanged, and so existing binaries and operating
+system distributions work without modification.
+
+In addition to exporting virtualized instances of CPU, memory, network
+and block devices, Xen exposes a control interface to manage how these
+resources are shared between the running domains. Access to the
+control interface is restricted: it may only be used by one
+specially-privileged VM, known as {\em domain-0}. This domain is a
+required part of any Xen-based server and runs the application software
+that manages the control-plane aspects of the platform. Running the
+control software in {\it domain-0}, distinct from the hypervisor
+itself, allows the Xen framework to separate the notions of
+mechanism and policy within the system.
+
+
+
+\chapter{Virtual Architecture}
+
+On a Xen-based system, the hypervisor itself runs in {\it ring 0}. It
+has full access to the physical memory available in the system and is
+responsible for allocating portions of it to the domains. Guest
+operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
+they see fit. Segmentation is used to prevent the guest OS from
+accessing the portion of the address space that is reserved for
+Xen. We expect most guest operating systems will use ring 1 for their
+own operation and place applications in ring 3.
+
+In this chapter we consider the basic virtual architecture provided
+by Xen: the basic CPU state, exception and interrupt handling, and
+time. Other aspects such as memory and device access are discussed
+in later chapters.
+
+\section{CPU state}
All privileged state must be handled by Xen. The guest OS has no
direct access to CR3 and is not permitted to update privileged bits in
-EFLAGS.
-
-\chapter{Exceptions}
-The IDT is virtualised by submitting a virtual 'trap
-table' to Xen. Most trap handlers are identical to native x86
-handlers. The page-fault handler is a noteable exception.
-
-\chapter{Interrupts and events}
-Interrupts are virtualized by mapping them to events, which are delivered
-asynchronously to the target domain. A guest OS can map these events onto
-its standard interrupt dispatch mechanisms, such as a simple vectoring
-scheme. Each physical interrupt source controlled by the hypervisor, including
-network devices, disks, or the timer subsystem, is responsible for identifying
-the target for an incoming interrupt and sending an event to that domain.
-
-This demultiplexing mechanism also provides a device-specific mechanism for
-event coalescing or hold-off. For example, a guest OS may request to only
-actually receive an event after {\it n} packets are queued ready for delivery
-to it, {\it t} nanoseconds after the first packet arrived (which ever is true
-first). This allows latency and throughput requirements to be addressed on a
-domain-specific basis.
-
-\chapter{Time}
-Guest operating systems need to be aware of the passage of real time and their
-own ``virtual time'', i.e. the time they have been executing. Furthermore, a
-notion of time is required in the hypervisor itself for scheduling and the
-activities that relate to it. To this end the hypervisor provides for notions
-of time: cycle counter time, system time, wall clock time, domain virtual
-time.
-
-
-\section{Cycle counter time}
-This provides the finest-grained, free-running time reference, with the
-approximate frequency being publicly accessible. The cycle counter time is
+EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen;
+these are analagous to system calls but occur from ring 1 to ring 0.
+
+A list of all hypercalls is given in Appendix~\ref{a:hypercalls}.
+
+
+
+\section{Exceptions}
+
+A virtual IDT is provided --- a domain can submit a table of trap
+handlers to Xen via the {\tt set\_trap\_table()} hypercall. Most trap
+handlers are identical to native x86 handlers, although the page-fault
+handler is somewhat different.
+
+
+\section{Interrupts and events}
+
+Interrupts are virtualized by mapping them to events, which are
+delivered asynchronously to the target domain. A guest OS can map
+these events onto its standard interrupt dispatch mechanisms. Xen
+is responsible for determining the target domain that will handle
+each physical interrupt source.
+
+
+\section{Time}
+
+Guest operating systems need to be aware of the passage of both real
+(or wallclock) time and their own `virtual time' (the time for
+which they have been executing). Furthermore, Xen has a notion of
+time which is used for scheduling. The following notions of
+time are provided:
+
+\begin{description}
+\item[Cycle counter time.]
+
+This provides a fine-grained time reference. The cycle counter time is
used to accurately extrapolate the other time references. On SMP machines
it is currently assumed that the cycle counter time is synchronised between
CPUs. The current x86-based implementation achieves this within inter-CPU
communication latencies.
-\section{System time}
-This is a 64-bit value containing the nanoseconds elapsed since boot
-time. Unlike cycle counter time, system time accurately reflects the
-passage of real time, i.e. it is adjusted several times a second for timer
-drift. This is done by running an NTP client in {\it domain0} on behalf of
-the machine, feeding updates to the hypervisor. Intermediate values can be
-extrapolated using the cycle counter.
-
-\section{Wall clock time}
-This is the actual ``time of day'' Unix style struct timeval (i.e. seconds and
-microseconds since 1 January 1970, adjusted by leap seconds etc.). Again, an
-NTP client hosted by {\it domain0} can help maintain this value. To guest
-operating systems this value will be reported instead of the hardware RTC
-clock value and they can use the system time and cycle counter times to start
-and remain perfectly in time.
-
-
-\section{Domain virtual time}
-This progresses at the same pace as cycle counter time, but only while a
-domain is executing. It stops while a domain is de-scheduled. Therefore the
-share of the CPU that a domain receives is indicated by the rate at which
-its domain virtual time increases, relative to the rate at which cycle
-counter time does so.
-
-\section{Time interface}
-Xen exports some timestamps to guest operating systems through their shared
-info page. Timestamps are provided for system time and wall-clock time. Xen
-also provides the cycle counter values at the time of the last update
-allowing guests to calculate the current values. The cpu frequency and a
-scaling factor are provided for guests to convert cycle counter values to
-real time. Since all time stamps need to be updated and read
-\emph{atomically} two version numbers are also stored in the shared info
-page.
-
-Xen will ensure that the time stamps are updated frequently enough to avoid
-an overflow of the cycle counter values. A guest can check if its notion of
-time is up-to-date by comparing the version numbers.
-
-\section{Timer events}
-
-Xen maintains a periodic timer (currently with a 10ms period) which sends a
-timer event to the currently executing domain. This allows Guest OSes to
-keep track of the passing of time when executing. The scheduler also
-arranges for a newly activated domain to receive a timer event when
-scheduled so that the Guest OS can adjust to the passage of time while it
-has been inactive.
-
-In addition, Xen exports a hypercall interface to each domain which allows
-them to request a timer event sent to them at the specified system
-time. Guest OSes may use this timer to implement timeout values when they
-block.
+\item[System time.]
+
+This is a 64-bit counter which holds the number of nanoseconds that
+have elapsed since system boot.
+
+
+\item[Wall clock time.]
+
+This is the time of day in a Unix-style {\tt struct timeval} (seconds
+and microseconds since 1 January 1970, adjusted by leap seconds). An
+NTP client hosted by {\it domain-0} can keep this value accurate.
+
+
+\item[Domain virtual time.]
+
+This progresses at the same pace as system time, but only while a
+domain is executing --- it stops while a domain is de-scheduled.
+Therefore the share of the CPU that a domain receives is indicated by
+the rate at which its virtual time increases.
+
+\end{description}
+
+
+Xen exports timestamps for system time and wall-clock time to guest
+operating systems through a shared page of memory. Xen also provides
+the cycle counter time at the instant the timestamps were calculated,
+and the CPU frequency in Hertz. This allows the guest to extrapolate
+system and wall-clock times accurately based on the current cycle
+counter time.
+
+Since all time stamps need to be updated and read \emph{atomically}
+two version numbers are also stored in the shared info page. The
+first is incremented prior to an update, while the second is only
+incremented afterwards. Thus a guest can be sure that it read a consistent
+state by checking the two version numbers are equal.
+
+Xen includes a periodic ticker which sends a timer event to the
+currently executing domain every 10ms. The Xen scheduler also sends a
+timer event whenever a domain is scheduled; this allows the guest OS
+to adjust for the time that has passed while it has been inactive. In
+addition, Xen allows each domain to request that they receive a timer
+event sent at a specified system time by using the {\tt
+set\_timer\_op()} hypercall. Guest OSes may use this timer to
+implement timeout values when they block.
+
\chapter{Memory}
-The hypervisor is responsible for providing memory to each of the
-domains running over it. However, the Xen hypervisor's duty is
-restricted to managing physical memory and to policying page table
-updates. All other memory management functions are handled
-externally. Start-of-day issues such as building initial page tables
-for a domain, loading its kernel image and so on are done by the {\it
-domain builder} running in user-space in {\it domain0}. Paging to
-disk and swapping is handled by the guest operating systems
-themselves, if they need it.
+Xen is responsible for managing the allocation of physical memory to
+domains, and for ensuring safe use of the paging and segmentation
+hardware.
+
+
+\section{Memory Allocation}
+
+
+Xen resides within a small fixed portion of physical memory; it also
+reserves the top 64MB of every virtual address space. The remaining
+physical memory is available for allocation to domains at a page
+granularity. Xen tracks the ownership and use of each page, which
+allows it to enforce secure partitioning between domains.
+
+Each domain has a maximum and current physical memory allocation.
+A guest OS may run a `balloon driver' to dynamically adjust its
+current memory allocation up to its limit.
+
+
+%% XXX SMH: I use machine and physical in the next section (which
+%% is kinda required for consistency with code); wonder if this
+%% section should use same terms?
+%%
+%% Probably.
+%%
+%% Merging this and below section at some point prob makes sense.
+
+\section{Pseudo-Physical Memory}
+
+Since physical memory is allocated and freed on a page granularity,
+there is no gaurantee that a domain will receive a contiguous stretch
+of physical memory. However most operating systems do not have good
+support for operating in a fragmented physical address space. To aid
+porting such operating systems to run on top of Xen, we make a
+distinction between \emph{machine memory} and \emph{pseduo-physical
+memory}.
+
+Put simply, machine memory refers to the entire amount of memory
+installed in the machine, including that reserved by Xen, in use by
+various domains, or currently unallocated. We consider machine memory
+to comprise a set of 4K \emph{machine page frames} numbered
+consecutively starting from 0. Machine frame numbers mean the same
+within Xen or any domain.
+
+Pseudo-physical memory, on the other hand, is a per-domain
+abstraction. It allows a guest operating system to consider its memory
+allocation to consist of a contiguous range of physical page frames
+starting at physical frame 0, despite the fact that the underlying
+machine page frames may be sparsely allocated and in any order.
+
+To achieve this, Xen maintains a globally readable {\it
+machine-to-physical} table which records the mapping from machine page
+frames to pseudo-physical ones. In addition, each domain is supplied
+with a {\it physical-to-machine} table which performs the inverse
+mapping. Clearly the machine-to-physical table has size proportional
+to the amount of RAM installed in the machine, while each
+physical-to-machine table has size proportional to the memory
+allocation of the given domain.
+
+Architecture dependent code in guest operating systems can then use
+the two tables to provide the abstraction of pseudo-physical
+memory. In general, only certain specialized parts of the operating
+system (such as page table management) needs to understand the
+difference between machine and pseudo-physical addresses.
-On a Xen-based system, the hypervisor itself runs in {\it ring 0}. It
-has full access to the physical memory available in the system and is
-responsible for allocating portions of it to the domains. Guest
-operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
-they see fit, aside from the fact that segmentation is used to prevent
-the guest OS from accessing a portion of the linear address space that
-is reserved for use by the hypervisor. This approach allows
-transitions between the guest OS and hypervisor without flushing the
-TLB. We expect most guest operating systems will use ring 1 for their
-own operation and place applications (if they support such a notion)
-in ring 3.
-
-\section{Physical Memory Allocation}
-The hypervisor reserves a small fixed portion of physical memory at
-system boot time. This special memory region is located at the
-beginning of physical memory and is mapped at the very top of every
-virtual address space.
-
-Any physical memory that is not used directly by the hypervisor is divided into
-pages and is available for allocation to domains. The hypervisor tracks which
-pages are free and which pages have been allocated to each domain. When a new
-domain is initialized, the hypervisor allocates it pages drawn from the free
-list. The amount of memory required by the domain is passed to the hypervisor
-as one of the parameters for new domain initialization by the domain builder.
-
-Domains can never be allocated further memory beyond that which was
-requested for them on initialization. However, a domain can return
-pages to the hypervisor if it discovers that its memory requirements
-have diminished.
-
-% put reasons for why pages might be returned here.
\section{Page Table Updates}
-In addition to managing physical memory allocation, the hypervisor is also in
-charge of performing page table updates on behalf of the domains. This is
-neccessary to prevent domains from adding arbitrary mappings to their page
-tables or introducing mappings to other's page tables.
-
-\section{Writabel Page Tables}
-A domain can also request write access to its page tables. In this
-mode, Xen notes write attempts to page table pages and makes the page
-temporarily writable. In-use page table pages are also disconnect
-from the page directory. The domain can now update entries in these
-page table pages without the assistance of Xen. As soon as the
-writabel page table pages get used as page table pages, Xen makes the
-pages read-only again and revalidates the entries in the pages.
+
+In the default mode of operation, Xen enforces read-only access to
+page tables and requires guest operating systems to explicitly request
+any modifications. Xen validates all such requests and only applies
+updates that it deems safe. This is necessary to prevent domains from
+adding arbitrary mappings to their page tables.
+
+To aid validation, Xen associates a type and reference count with each
+memory page. A page has one of the following
+mutually-exclusive types at any point in time: page directory ({\sf
+PD}), page table ({\sf PT}), local descriptor table ({\sf LDT}),
+global descriptor table ({\sf GDT}), or writable ({\sf RW}). Note that
+a guest OS may always create readable mappings of its own memory
+regardless of its current type.
+%%% XXX: possibly explain more about ref count 'lifecyle' here?
+This mechanism is used to
+maintain the invariants required for safety; for example, a domain
+cannot have a writable mapping to any part of a page table as this
+would require the page concerned to simultaneously be of types {\sf
+ PT} and {\sf RW}.
+
+
+%\section{Writable Page Tables}
+
+Xen also provides an alternative mode of operation in which guests be
+have the illusion that their page tables are directly writable. Of
+course this is not really the case, since Xen must still validate
+modifications to ensure secure partitioning. To this end, Xen traps
+any write attempt to a memory page of type {\sf PT} (i.e., that is
+currently part of a page table). If such an access occurs, Xen
+temporarily allows write access to that page while at the same time
+{\em disconnecting} it from the page table that is currently in
+use. This allows the guest to safely make updates to the page because
+the newly-updated entries cannot be used by the MMU until Xen
+revalidates and reconnects the page.
+Reconnection occurs automatically in a number of situations: for
+example, when the guest modifies a different page-table page, when the
+domain is preempted, or whenever the guest uses Xen's explicit
+page-table update interfaces.
+
\section{Segment Descriptor Tables}
-On boot a guest is supplied with a default GDT, which is {\em not}
-taken from its own memory allocation. If the guest wishes to use other
-than the default `flat' ring-1 and ring-3 segments that this default
-table provides, it must register a custom GDT and/or LDT with Xen,
-allocated from its own memory.
+On boot a guest is supplied with a default GDT, which does not reside
+within its own memory allocation. If the guest wishes to use other
+than the default `flat' ring-1 and ring-3 segments that this GDT
+provides, it must register a custom GDT and/or LDT with Xen,
+allocated from its own memory. Note that a number of GDT
+entries are reserved by Xen -- any custom GDT must also include
+sufficent space for these entries.
+
+For example, the following hypercall is used to specify a new GDT:
+\begin{quote}
int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em entries})
{\em frame\_list}: An array of up to 16 page frames within which the
@@ -253,28 +326,27 @@ mappings, no use as a page-table page, and so on).
{\em entries}: The number of descriptor-entry slots in the GDT. Note
that the table must be large enough to contain Xen's reserved entries;
-thus we must have '{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}'.
+thus we must have `{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}\ '.
Note also that, after registering the GDT, slots {\em FIRST\_} through
{\em LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest and
may be overwritten by Xen.
+\end{quote}
-\section{Pseudo-Physical Memory}
-The usual problem of external fragmentation means that a domain is
-unlikely to receive a contiguous stretch of physical memory. However,
-most guest operating systems do not have built-in support for
-operating in a fragmented physical address space e.g. Linux has to
-have a one-to-one mapping for its physical memory. There a notion of
-{\it pseudo physical memory} is introdouced. Xen maintains a {\it
-real physical} to {\it pseudo physical} mapping which can be consulted
-by every domain. Additionally, at its start of day, a domain is
-supplied a {\it pseudo physical} to {\it real physical} mapping which
-it needs to keep updated itself. From that moment onwards {\it pseudo
-physical} addresses are used instead of discontiguous {\it real
-physical} addresses. Thus, the rest of the guest OS code has an
-impression of operating in a contiguous address space. Guest OS page
-tables contain real physical addresses. Mapping {\it pseudo physical}
-to {\it real physical} addresses is needed on page table updates and
-also on remapping memory regions with the guest OS.
+The LDT is updated via the generic MMU update mechanism (i.e., via
+the {\tt mmu\_update()} hypercall.
+
+\section{Start of Day}
+
+The start-of-day environment for guest operating systems is rather
+different to that provided by the underlying hardware. In particular,
+the processor is already executing in protected mode with paging
+enabled.
+
+{\it Domain-0} is created and booted by Xen itself. For all subsequent
+donains, the analogue of the boot-loader is the {\it domain builder},
+user-space software running in {\it domain-0}. The domain builder
+is responsible for building the initial page tables for a domain
+and loading its kernel image at the appropriate virtual address.
@@ -416,9 +488,265 @@ of the CPU for each domain. Round-robin is provided as an example of
Xen's internal scheduler API.
More information on the characteristics and use of these schedulers is
-available in { \tt Sched-HOWTO.txt }.
+available in {\tt Sched-HOWTO.txt}.
+
+
+
+
+\appendix
+
+%\newcommand{\hypercall}[1]{\vspace{5mm}{\large\sf #1}}
+
+
+
+
+
+\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
+
+
+
+\hypercall{physdev\_op(void *physdev\_op)}
+
+
+\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
+
+
+
+
+\chapter{Xen Hypercalls}
+\label{a:hypercalls}
+
+Hypercalls represent the procedural interface to Xen; this appendix
+categorizes and describes the current set of hypercalls.
+
+\section{Invoking Hypercalls}
+
+\hypercall{multicall(void *call\_list, int nr\_calls)}
+
+Execute a series of hypervisor calls
+
+
+
+
+\section{Virtual CPU Setup}
+
+\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
+ event\_address, unsigned long failsafe\_selector, unsigned long
+ failsafe\_address) }
+
+Register OS event processing routine. In
+Linux both the event\_selector and failsafe\_selector are the
+kernel's CS. The value event\_address specifies the address for an
+interrupt handler dispatch routine and failsafe\_address specifies a
+handler for application faults.
+
+\hypercall{set\_trap\_table(trap\_info\_t *table)}
+
+Install trap handler table.
+
+
+\hypercall{set\_fast\_trap(int idx)}
+
+ install traps to allow guest OS to bypass hypervisor
+
+
+
+
+\section{Scheduling}
+
+
+\hypercall{stack\_switch(unsigned long ss, unsigned long esp)}
+
+Request context switch from hypervisor.
+
+
+\hypercall{fpu\_taskswitch(void)}
+
+Notify hypervisor that fpu registers needed to be save on context switch.
+
+
+\hypercall{sched\_op(unsigned long op)}
+
+Request scheduling operation from hypervisor. The options are: {\it
+yield}, {\it block}, and {\it shutdown}. {\it yield} keeps the
+calling domain run-able but may cause a reschedule if other domains
+are run-able. {\it block} removes the calling domain from the run
+queue and the domains sleeps until an event is delivered to it. {\it
+shutdown} is used to end the domain's execution and allows to specify
+whether the domain should reboot, halt or suspend..
+
+\hypercall{set\_timer\_op(uint64\_t timeout)}
+
+Request a timer event to be sent at the specified system time.
+
+
+\section{Page Table Management}
+
+\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)}
+
+Update the page table for the domain. Updates can be batched.
+success\_count will be updated to report the number of successfull
+updates. The update types are:
+
+{\it MMU\_NORMAL\_PT\_UPDATE}:
+
+{\it MMU\_MACHPHYS\_UPDATE}:
+
+{\it MMU\_EXTENDED\_COMMAND}:
+
+
+\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long val, unsigned long flags)}
+
+
+\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
+unsigned long val, unsigned long flags, uint16\_t domid)}
+
+
+\section{Segmentation Support}
+
+
+\hypercall{set\_gdt(unsigned long *frame\_list, int entries)}
+
+Set the global descriptor table - virtualization for lgdt.
+
+
+
+\hypercall{update\_descriptor(unsigned long ma, unsigned long word1, unsigned long word2)}
+
+
+
+
+\section{Inter-Domain Communication}
+
+
+\hypercall{event\_channel\_op(void *op)}
+
+Inter-domain event-channel management.
+
+
+\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
+
+
+
+\section{Physical Memory Management}
+
+\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
+unsigned long nr\_extents, unsigned int extent\_order)}
+
+Increase or decrease memory reservations for guest OS
+
+
+
+
+
+
+\section{Administrative Operations}
+
+
+\hypercall{dom0\_op(dom0\_op\_t *op)}
+
+Administrative domain operations for domain management. The options are:
+
+{\it DOM0\_CREATEDOMAIN}: create new domain, specifying the name and memory usage
+in kilobytes.
+
+{\it DOM0\_CREATEDOMAIN}: create domain
+
+{\it DOM0\_PAUSEDOMAIN}: mark domain as unschedulable
+
+{\it DOM0\_UNPAUSEDOMAIN}: mark domain as schedulable
+
+{\it DOM0\_DESTROYDOMAIN}: deallocate resources associated with the domain
+
+{\it DOM0\_GETMEMLIST}: get list of pages used by the domain
+
+{\it DOM0\_SCHEDCTL}:
+
+{\it DOM0\_ADJUSTDOM}: adjust scheduling priorities for domain
+
+{\it DOM0\_BUILDDOMAIN}: do final guest OS setup for domain
+
+{\it DOM0\_GETDOMAINFO}: get statistics about the domain
+
+{\it DOM0\_GETPAGEFRAMEINFO}:
+
+{\it DOM0\_IOPL}: set IO privilege level
+
+{\it DOM0\_MSR}:
+
+{\it DOM0\_DEBUG}: interactively call pervasive debugger
+
+{\it DOM0\_SETTIME}: set system time
+
+{\it DOM0\_READCONSOLE}: read console content from hypervisor buffer ring
+
+{\it DOM0\_PINCPUDOMAIN}: pin domain to a particular CPU
+
+{\it DOM0\_GETTBUFS}: get information about the size and location of
+ the trace buffers (only on trace-buffer enabled builds)
+
+{\it DOM0\_PHYSINFO}: get information about the host machine
+
+{\it DOM0\_PCIDEV\_ACCESS}: modify PCI device access permissions
+
+{\it DOM0\_SCHED\_ID}: get the ID of the current Xen scheduler
+
+{\it DOM0\_SHADOW\_CONTROL}:
+
+{\it DOM0\_SETDOMAINNAME}: set the name of a domain
+
+{\it DOM0\_SETDOMAININITIALMEM}: set initial memory allocation of a domain
+
+{\it DOM0\_SETDOMAINMAXMEM}: set maximum memory allocation of a domain
+
+{\it DOM0\_GETPAGEFRAMEINFO2}:
+
+{\it DOM0\_SETDOMAINVMASSIST}: set domain VM assist options
+
+
+
+
+\section{Miscellaneous Hypercalls}
+
+
+\hypercall{console\_io(int cmd, int count, char *str)}
-\section{Scheduling API}
+Interact with the console, operations are:
+
+{\it CONSOLEIO\_write}: Output count characters from buffer str.
+
+{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
+
+
+
+\hypercall{set\_debugreg(int reg, unsigned long value)}
+
+set debug register reg to value
+
+
+\hypercall{get\_debugreg(int reg)}
+
+ get the debug register reg
+
+
+\hypercall{xen\_version(int cmd)}
+
+Request Xen version number.
+
+
+
+
+
+
+%%
+%% XXX SMH: not really sure how useful below is -- if it's still
+%% actually true, might be useful for someone wanting to write a
+%% new scheduler... not clear how many of them there are...
+%%
+
+\begin{comment}
+
+\chapter{Scheduling API}
The scheduling API is used by both the schedulers described above and should
also be used by any new schedulers. It provides a generic interface and also
@@ -470,7 +798,7 @@ this scheduler.
\subsubsection{sched\_id}
This is an integer that uniquely identifies this scheduler. There should be a
-macro corrsponding to this scheduler ID in {\tt <hypervisor-ifs/sched-if.h>}.
+macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
\subsubsection{init\_scheduler}
@@ -687,6 +1015,20 @@ This method should dump any private settings for the specified task.
This function is called with interrupts disabled and the {\tt schedule\_lock}
for the task's CPU held.
+\end{comment}
+
+
+
+
+%%
+%% XXX SMH: we probably should have something in here on debugging
+%% etc; this is a kinda developers manual and many devs seem to
+%% like debugging support :^)
+%% Possibly sanitize below, else wait until new xendbg stuff is in
+%% (and/or kip's stuff?) and write about that instead?
+%%
+
+\begin{comment}
\chapter{Debugging}
@@ -751,151 +1093,9 @@ trace points, there is an example format file in {\tt tools/xentrace/formats }.
For more information, see the manual pages for {\tt xentrace}, {\tt
xentrace\_format} and {\tt xentrace\_cpusplit}.
+\end{comment}
-\chapter{Hypervisor calls}
-
-\section{ set\_trap\_table(trap\_info\_t *table)}
-
-Install trap handler table.
-
-\section{ mmu\_update(mmu\_update\_t *req, int count, int *success\_count)}
-Update the page table for the domain. Updates can be batched.
-success\_count will be updated to report the number of successfull
-updates. The update types are:
-
-{\it MMU\_NORMAL\_PT\_UPDATE}:
-
-{\it MMU\_MACHPHYS\_UPDATE}:
-
-{\it MMU\_EXTENDED\_COMMAND}:
-
-\section{ set\_gdt(unsigned long *frame\_list, int entries)}
-Set the global descriptor table - virtualization for lgdt.
-
-\section{ stack\_switch(unsigned long ss, unsigned long esp)}
-Request context switch from hypervisor.
-
-\section{ set\_callbacks(unsigned long event\_selector, unsigned long event\_address,
- unsigned long failsafe\_selector, unsigned
- long failsafe\_address) } Register OS event processing routine. In
- Linux both the event\_selector and failsafe\_selector are the
- kernel's CS. The value event\_address specifies the address for an
- interrupt handler dispatch routine and failsafe\_address specifies a
- handler for application faults.
-
-\section{ fpu\_taskswitch(void)}
-Notify hypervisor that fpu registers needed to be save on context switch.
-
-\section{ sched\_op(unsigned long op)}
-Request scheduling operation from hypervisor. The options are: {\it
-yield}, {\it block}, and {\it shutdown}. {\it yield} keeps the
-calling domain run-able but may cause a reschedule if other domains
-are run-able. {\it block} removes the calling domain from the run
-queue and the domains sleeps until an event is delivered to it. {\it
-shutdown} is used to end the domain's execution and allows to specify
-whether the domain should reboot, halt or suspend..
-
-\section{ dom0\_op(dom0\_op\_t *op)}
-Administrative domain operations for domain management. The options are:
-
-{\it DOM0\_CREATEDOMAIN}: create new domain, specifying the name and memory usage
-in kilobytes.
-
-{\it DOM0\_CREATEDOMAIN}: create domain
-
-{\it DOM0\_PAUSEDOMAIN}: mark domain as unschedulable
-
-{\it DOM0\_UNPAUSEDOMAIN}: mark domain as schedulable
-
-{\it DOM0\_DESTROYDOMAIN}: deallocate resources associated with the domain
-
-{\it DOM0\_GETMEMLIST}: get list of pages used by the domain
-
-{\it DOM0\_SCHEDCTL}:
-
-{\it DOM0\_ADJUSTDOM}: adjust scheduling priorities for domain
-
-{\it DOM0\_BUILDDOMAIN}: do final guest OS setup for domain
-
-{\it DOM0\_GETDOMAINFO}: get statistics about the domain
-
-{\it DOM0\_GETPAGEFRAMEINFO}:
-
-{\it DOM0\_IOPL}: set IO privilege level
-
-{\it DOM0\_MSR}:
-
-{\it DOM0\_DEBUG}: interactively call pervasive debugger
-
-{\it DOM0\_SETTIME}: set system time
-
-{\it DOM0\_READCONSOLE}: read console content from hypervisor buffer ring
-
-{\it DOM0\_PINCPUDOMAIN}: pin domain to a particular CPU
-
-{\it DOM0\_GETTBUFS}: get information about the size and location of
- the trace buffers (only on trace-buffer enabled builds)
-
-{\it DOM0\_PHYSINFO}: get information about the host machine
-
-{\it DOM0\_PCIDEV\_ACCESS}: modify PCI device access permissions
-
-{\it DOM0\_SCHED\_ID}: get the ID of the current Xen scheduler
-
-{\it DOM0\_SHADOW\_CONTROL}:
-
-{\it DOM0\_SETDOMAINNAME}: set the name of a domain
-
-{\it DOM0\_SETDOMAININITIALMEM}: set initial memory allocation of a domain
-
-{\it DOM0\_SETDOMAINMAXMEM}: set maximum memory allocation of a domain
-
-{\it DOM0\_GETPAGEFRAMEINFO2}:
-
-{\it DOM0\_SETDOMAINVMASSIST}: set domain VM assist options
-
-
-\section{ set\_debugreg(int reg, unsigned long value)}
-set debug register reg to value
-
-\section{ get\_debugreg(int reg)}
- get the debug register reg
-
-\section{ update\_descriptor(unsigned long ma, unsigned long word1, unsigned long word2)}
-
-\section{ set\_fast\_trap(int idx)}
- install traps to allow guest OS to bypass hypervisor
-
-\section{ dom\_mem\_op(unsigned int op, unsigned long *extent\_list, unsigned long nr\_extents, unsigned int extent\_order)}
-Increase or decrease memory reservations for guest OS
-
-\section{ multicall(void *call\_list, int nr\_calls)}
-Execute a series of hypervisor calls
-
-\section{ update\_va\_mapping(unsigned long page\_nr, unsigned long val, unsigned long flags)}
-
-\section{ set\_timer\_op(uint64\_t timeout)}
-Request a timer event to be sent at the specified system time.
-
-\section{ event\_channel\_op(void *op)}
-Iinter-domain event-channel management.
-
-\section{ xen\_version(int cmd)}
-Request Xen version number.
-
-\section{ console\_io(int cmd, int count, char *str)}
-Interact with the console, operations are:
-
-{\it CONSOLEIO\_write}: Output count characters from buffer str.
-
-{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
-
-\section{ physdev\_op(void *physdev\_op)}
-
-\section{ grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
-\section{ vm\_assist(unsigned int cmd, unsigned int type)}
-\section{ update\_va\_mapping\_otherdomain(unsigned long page\_nr, unsigned long val, unsigned long flags, uint16\_t domid)}
\end{document}
diff --git a/docs/src/user.tex b/docs/src/user.tex
index 5b3d256984..ff7978bba0 100644
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -1,6 +1,6 @@
\documentclass[11pt,twoside,final,openright]{xenstyle}
-\usepackage{a4,graphicx,setspace,times}
-\setstretch{1.15}
+\usepackage{a4,graphicx,parskip,setspace,times}
+\setstretch{1.1}
\begin{document}
@@ -16,12 +16,19 @@
{\Huge \bf Users' manual} \\[4mm]
{\huge Xen v2.0 for x86} \\[80mm]
-{\Large Xen is Copyright (c) 2004, The Xen Team} \\[3mm]
+{\Large Xen is Copyright (c) 2002-2004, The Xen Team} \\[3mm]
{\Large University of Cambridge, UK} \\[20mm]
-{\large Last updated on 26th October, 2004}
\end{tabular}
-\vfill
\end{center}
+
+{\bf
+DISCLAIMER: This documentation is currently under active development
+and as such there may be mistakes and omissions --- watch out for
+these and please report any you find to the developer's mailing list.
+Contributions of material, suggestions and corrections are welcome.
+}
+
+\vfill
\cleardoublepage
% TABLE OF CONTENTS
@@ -42,22 +49,13 @@
\renewcommand{\bottomfraction}{.8}
\renewcommand{\textfraction}{.2}
\renewcommand{\floatpagefraction}{.8}
-\setstretch{1.15}
+\setstretch{1.1}
\newcommand{\path}[1]{{\tt #1}}
\part{Introduction and Tutorial}
\chapter{Introduction}
-{\bf
-DISCLAIMER: This documentation is currently under active development
-and as such there may be mistakes and omissions --- watch out for
-these and please report any you find to the developer's mailing list.
-Contributions of material, suggestions and corrections are welcome.
-}
-
-\vspace{5mm}
-
Xen is a { \em paravirtualising } virtual machine monitor (VMM), or
`hypervisor', for the x86 processor architecture. Xen can securely
execute multiple virtual machines on a single physical system with
@@ -81,7 +79,7 @@ The drawback of this approach is that it requires operating systems to
be {\em ported} to run on Xen. This process is similar to a port of
an operating system to a new hardware platform, although the process
is simplified because the paravirtual machine architecture is very
-similar to the underlying native hardware. Although operating system
+similar to the underlying native hardware. Even though operating system
kernels must explicitly support Xen, a key feature is that user space
applications and libraries {\em do not} require modification.
@@ -91,11 +89,6 @@ We expect that Xen support will ultimately be integrated into the
official releases of Linux, NetBSD, FreeBSD and Dragonfly BSD.
Other OS ports, including Plan 9, are in progress.
-%Even running multiple copies of Linux can be very useful, providing a
-%means of containing faults to one OS image, providing performance
-%isolation between the various OS instances and trying out multiple
-%distros.
-
Possible usage scenarios for Xen include:
\begin{description}
\item [Kernel development.] Test and debug kernel modifications in a
@@ -103,18 +96,18 @@ Possible usage scenarios for Xen include:
machine.
\item [Multiple OS configurations.] Run multiple operating systems
simultaneously, for instance for compatibility or QA purposes.
-\item [Server consolidation.] Move multiple servers onto one box,
- provided performance and fault isolation at virtual machine
- boundaries.
+\item [Server consolidation.] Move multiple servers onto one box
+ with performance and fault isolation provided at virtual machine
+ boundaries.
\item [Cluster computing.] Improve manageability and efficiency by
running services in virtual machines, isolated from
- machine-specifics and load balance using live migration.
+ machine-specifics; load balance using live migration.
\item [High availability computing.] Run device drivers in sandboxed
domains for increased robustness.
\item [Hardware support for custom OSes.] Export drivers from a
- mainstream OS (e.g. Linux) with good hardware support
+ mainstream OS (e.g. Linux) with wide-ranging hardware support
to your custom OS, avoiding the need for you to port existing
- drivers to achieve good hardware support.
+ drivers.
\end{description}
\section{Structure of a Xen-Based System}
@@ -133,15 +126,15 @@ other domains and manages their virtual devices. It also performs
suspend, resume and migration of virtual machines. Where one is
required, the X server is also run in domain 0.
-Within Domain 0, a process called `Xend' runs to manage the system.
+Within Domain 0, a process called `xend' runs to manage the system.
Xend is responsible for managing virtual machines and providing access
to their consoles. Commands are issued to Xend over an HTTP
interface, either from a command-line tool or from a web browser.
\section{Hardware Support}
-Xen currently runs only on the x86 architecture (however, ports to other
-architectures, including x86/64 and IA64, are in progress).
+Xen currently runs only on the x86 architecture, although ports to other
+architectures, including x86/64 and IA64, are in progress.
Xen requires a `P6' or newer processor (e.g. Pentium Pro, Celeron,
Pentium II, Pentium III, Pentium IV, Xeon, AMD Athlon, AMD Duron).
Multiprocessor machines are supported, and we also have basic support
@@ -153,7 +146,7 @@ mode.
Xen can currently use up to 4GB of memory. It is possible for x86
machines to address up to 64GB of physical memory but there are no
plans to support these systems. The x86\_64 port is the planned route
-to supporting more than 4GB of memory.
+to supporting larger memory sizes.
Xen offloads most of the hardware support issues to the guest OS
running in Domain 0. Xen itself only contains code to detect and
@@ -183,7 +176,7 @@ information along with pointers to papers and technical reports:
Xen has since grown into a project in its own right, enabling us to
investigate interesting research issues regarding the best techniques
-for virtualizing resources such as the CPU, memory, disk and network.
+for virtualising resources such as the CPU, memory, disk and network.
The project has been bolstered by support from Intel Research
Cambridge, and HP Labs, who are now working closely with us.
@@ -193,16 +186,15 @@ first public release (1.0) was made in October 2003. Since
then, Xen has been extensively developed and is now used in production
scenarios on multiple sites.
-Xen 2.0 feature greatly enhanced hardware support, configuration
+Xen 2.0 features greatly enhanced hardware support, configuration
flexibility, usability and a larger complement of supported operating
-systems. We think that Xen has the potential to become {\em the}
-definitive open source virtualisation solution and will work to
-conclusively achieve that position.
+systems. This latest release takes Xen a step closer to becoming the
+definitive open source solution for virtualisation.
\chapter{Installation}
The Xen distribution includes three main components: Xen itself,
-utilities to convert a standard Linux tree to run on Xen and the
+utilities to convert a standard Linux tree to run on Xen, and the
userspace tools required to operate a Xen-based system.
This manual describes how to install the Xen 2.0 distribution from
@@ -217,7 +209,7 @@ operating system distribution.
\item A working installation of the GRUB bootloader.
\item An installation of Twisted v1.3 or above (see {\tt
http://www.twistedmatrix.com}). There may be a package available for
-your distribution; alternatively it can be installed by running {\tt \#
+your distribution; alternatively it can be installed by running {\tt
make install-twisted} in the root of the Xen source tree.
\item Python logging package (see {\tt http://www.red-dove.com/})
\item The Linux bridge control tools (see {\tt
@@ -236,9 +228,11 @@ available for your distribution.
\section{Install Bitkeeper (Optional)}
To fetch a local copy, first download the BitKeeper tools.
-Download instructions must be obtained by filling out the provided
-form at: \\ {\tt
-http://www.bitmover.com/cgi-bin/download.cgi }
+Download instructions can be obtained by filling out the provided
+form at:
+\begin{quote}
+{\tt http://www.bitmover.com/cgi-bin/download.cgi}
+\end{quote}
The BitKeeper install program is designed to be run with X. If X is
not available, you can specify the install directory on the command
@@ -248,8 +242,11 @@ line.
\subsection{Using Bitkeeper}
-The public master BK repository for the 2.0 release lives at: \\
-{\tt bk://xen.bkbits.net/xen-2.0.bk}. You can use Bitkeeper to
+The public master BK repository for the 2.0 release lives at:
+\begin{quote}
+{\tt bk://xen.bkbits.net/xen-2.0.bk}
+\end{quote}
+You can use Bitkeeper to
download it and keep it updated with the latest features and fixes.
Change to the directory in which you want to put the source code, then
@@ -258,6 +255,7 @@ run:
# bk clone bk://xen.bkbits.net/xen-2.0.bk
\end{verbatim}
+
Under your current directory, a new directory named `xen-2.0.bk' has
been created, which contains all the source code for the Xen
hypervisor and the Xen tools. The directory also contains `sparse' OS
@@ -276,7 +274,8 @@ changes to the repository by running:
The Xen source tree is also available in gzipped tarball form from the
Xen downloads page:\\
{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/downloads.html}.
-Prebuilt tarballs are also available from this page but are very large.
+Prebuilt tarballs are also available from this page but are relatively
+ large.
\section{The distribution}
@@ -311,9 +310,9 @@ following:
Inspect the Makefile if you want to see what goes on during a build.
Building Xen and the tools is straightforward, but XenLinux is more
-complicated. The makefile needs a `pristine' linux kernel tree which
-it will then add the Xen architecture files to. You can tell the
-makefile the location of the appropriate linux compressed tar file by
+complicated. The makefile needs a `pristine' Linux kernel tree to which
+it will then add the Xen architecture files. You can tell the
+makefile the location of the appropriate Linux compressed tar file by
setting the LINUX\_SRC environment variable, e.g. \\
\verb!# LINUX_SRC=/tmp/linux-2.6.8.1.tar.bz2 make world! \\ or by
placing the tar file somewhere in the search path of {\tt
@@ -476,30 +475,36 @@ The first step in creating a new domain is to prepare a root
filesystem for it to boot off. Typically, this might be stored in a
normal partition, an LVM or other volume manager partition, a disk
file or on an NFS server.
-
A simple way to do this is simply to boot from your standard OS
install CD and install the distribution into another partition on your
hard drive.
-{\em N.b } you can boot with Xen and XenLinux without installing any
-special userspace tools but will need to have the prerequisites
-described in Section~\ref{sec:prerequisites} and the Xen control tools
-installed before you proceed.
-
-\section{From the web interface}
-
-Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv} for
-more details) using the command: \\
-\verb_# xensv start_ \\
-This will also start Xend (see Chapter~\ref{cha:xend} for more information).
-
-The domain management interface will then be available at {\tt
-http://your\_machine:8080/}. This provides a user friendly wizard for
-starting domains and functions for managing running domains.
-
-\section{From the command line}
-
-Full details of the {\tt xm} tool are found in Chapter~\ref{cha:xm}.
+You can boot Xen and a single XenLinux instance without installing any
+special user-space tools. To proceed further than this you will need
+to install the prerequisites described in Section~\ref{sec:prerequisites}
+and the Xen control tools. The control tools are installed by entering
+the tools subdirectory of the repository and typing \\
+\verb!# LINUX_SRC=/path/to/linux2.4/source make linux24! \\
+
+To start the control daemon, type \\ \verb!# xend start! \\ If you
+wish to start the daemon automatically, see the instructions in
+Chapter~\ref{cha:xend}. Once the daemon is running, you can use the
+{\tt xm} tool to monitor and maintain the domains running on your
+system. This chapter provides only a brief tutorial: we provide full
+details of the {\tt xm} tool in Chapter~\ref{cha:xm}.
+
+%\section{From the web interface}
+%
+%Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv} for
+%more details) using the command: \\
+%\verb_# xensv start_ \\
+%This will also start Xend (see Chapter~\ref{cha:xend} for more information).
+%
+%The domain management interface will then be available at {\tt
+%http://your\_machine:8080/}. This provides a user friendly wizard for
+%starting domains and functions for managing running domains.
+%
+%\section{From the command line}
This example explains how to use the \path{xmdefconfig} file. If you
require a more complex setup, you will want to write a custom
@@ -531,7 +536,7 @@ second to the location of \path{/usr} (if you are sharing it between
domains). [i.e. {\tt disk = ['phy:your\_hard\_drive\%d,sda1,w' \%
(base\_partition\_number + vmid), 'phy:your\_usr\_partition,sda6,r' ]}
\item[dhcp] Uncomment the dhcp variable, so that the domain will
-receive its IP address from a DHCP server. [i.e. {\tt dhcp=''dhcp''}]
+receive its IP address from a DHCP server. [i.e. {\tt dhcp='dhcp'}]
\end{description}
You may also want to edit the {\bf vif} variable in order to choose
@@ -597,8 +602,8 @@ configuration file (or a link to it) under \path{/etc/xen/auto/}.
A Sys-V style init script for RedHat and LSB-compliant systems is
provided and will be automatically copied to \path{/etc/init.d/}
-during install. You can then enable it in the appriate way for your
-distribution.
+during install. You can then enable it in the appropriate way for
+your distribution.
For instance, on RedHat:
@@ -689,13 +694,22 @@ or:
# xm console 5
\end{verbatim}
-\chapter{Other kinds of storage}
+\chapter{Domain filesystem storage}
It is possible to directly export any Linux block device to a virtual,
or to export filesystems / devices to virtual machines using standard
-network protocals (e.g. NBD, iSCSI, NFS, etc). This chapter covers
+network protocols (e.g. NBD, iSCSI, NFS, etc). This chapter covers
some of the possibilities.
+\section{Warning: Block device sharing}
+
+Block devices should only be shared between domains in a read-only
+fashion otherwise the Linux kernels will obviously get very confused
+as the file system structure may change underneath them (having the
+same partition mounted rw twice is a sure fire way to cause
+irreparable damage)! If you want read-write sharing, export the
+directory to other domains via NFS from domain0.
+
\section{File-backed virtual block devices}
It is possible to use a file in Domain 0 as the primary storage for a
@@ -1227,6 +1241,25 @@ parameters, etc.
% Support for other administrative domains is not yet available...
+\chapter{Debugging}
+
+Xen has a set of debugging features that can be useful to try and
+figure out what's going on. Hit 'h' on the serial line (if you
+specified a baud rate on the Xen command line) or ScrollLock-h on the
+keyboard to get a list of supported commands.
+
+If you have a crash you'll likely get a crash dump containing an EIP
+(PC) which, along with an 'objdump -d image', can be useful in
+figuring out what's happened. Debug a Xenlinux image just as you
+would any other Linux kernel.
+
+We supply a handy debug terminal program which you can find in
+/usr/local/src/xen-2.0.bk/tools/misc/miniterm/
+This should be built and executed on another machine that is connected
+via a null modem cable. Documentation is included.
+Alternatively, if the Xen machine is connected to a serial-port server
+then we supply a dumb TCP terminal client, {\tt xencons}.
+
\chapter{Xen build options}
For most users, the default build of Xen will be adequate. For some
@@ -1582,6 +1615,61 @@ template and the new image, and using {\tt cp -a} or {\tt tar} or by
simply copying the image file. Once this is done, modify the
image-specific settings (hostname, network settings, etc).
+\chapter{Installing Xen / XenLinux on Redhat / Fedora}
+
+When using Xen / Xenlinux on a standard Linux distribution there are
+a couple of things to watch out for:
+
+Note that, because domains>0 don't have any privileged access at all,
+certain commands in the default boot sequence will fail e.g. attempts
+to update the hwclock, change the console font, update the keytable
+map, start apmd (power management), or gpm (mouse cursor). Either
+ignore the errors (they should be harmless), or remove them from the
+startup scripts. Deleting the following links are a good start:
+S24pcmcia S09isdn S17keytable S26apmd S85gpm.
+
+If you want to use a single root file system that works cleanly for
+domain0 and domains>0, a useful trick is to use different 'init' run
+levels. For example, on the Xen Demo CD we use run level 3 for domain
+0, and run level 4 for domains>0. This enables different startup
+scripts to be run in depending on the run level number passed on the
+kernel command line.
+
+If you're going to use NFS root files systems mounted either from an
+external server or from domain0 there are a couple of other gotchas.
+The default /etc/sysconfig/iptables rules block NFS, so part way
+through the boot sequence things will suddenly go dead.
+
+If you're planning on having a separate NFS /usr partition, the RH9
+boot scripts don't make life easy - they attempt to mount NFS file
+systems way to late in the boot process. The easiest way I found to do
+this was to have a '/linuxrc' script run ahead of /sbin/init that
+mounts /usr:
+
+\begin{verbatim}
+ #!/bin/bash
+ /sbin/ipconfig lo 127.0.0.1
+ /sbin/portmap
+ /bin/mount /usr
+ exec /sbin/init "$@" <>/dev/console 2>&1
+\end{verbatim}
+
+The one slight complication with the above is that /sbib/portmap is
+dynamically linked against /usr/lib/libwrap.so.0 Since this is in
+/usr, it won't work. This can be solved by copying the file (and link)
+below the /usr mount point, and just let the file be 'covered' when
+the mount happens.
+
+In some installations, where a shared read-only /usr is being used, it
+may be desirable to move other large directories over into the
+read-only /usr. For example, you might replace /bin /lib and /sbin
+with links into /usr/root/bin /usr/root/lib and /usr/root/sbin
+respectively. This creates other problems for running the /linuxrc
+script, requiring bash, portmap, mount, ifconfig, and a handful of
+other shared libraries to be copied below the mount point - little
+statically linked C program would solve this problem.
+
+
\end{document}
@@ -1612,4 +1700,52 @@ image-specific settings (hostname, network settings, etc).
%% You can use these modules to write your own custom scripts or you can
%% customise the scripts supplied in the Xen distribution.
+
+
% Explain about AGP GART
+
+
+%% If you're not intending to configure the new domain with an IP address
+%% on your LAN, then you'll probably want to use NAT. The
+%% 'xen_nat_enable' installs a few useful iptables rules into domain0 to
+%% enable NAT. [NB: We plan to support RSIP in future]
+
+
+
+
+%% Installing the file systems from the CD
+%% =======================================
+
+%% If you haven't got an existing Linux installation onto which you can
+%% just drop down the Xen and Xenlinux images, then the file systems on
+%% the CD provide a quick way of doing an install. However, you would be
+%% better off in the long run doing a proper install of your preferred
+%% distro and installing Xen onto that, rather than just doing the hack
+%% described below:
+
+%% Choose one or two partitions, depending on whether you want a separate
+%% /usr or not. Make file systems on it/them e.g.:
+%% mkfs -t ext3 /dev/hda3
+%% [or mkfs -t ext2 /dev/hda3 && tune2fs -j /dev/hda3 if using an old
+%% version of mkfs]
+
+%% Next, mount the file system(s) e.g.:
+%% mkdir /mnt/root && mount /dev/hda3 /mnt/root
+%% [mkdir /mnt/usr && mount /dev/hda4 /mnt/usr]
+
+%% To install the root file system, simply untar /usr/XenDemoCD/root.tar.gz:
+%% cd /mnt/root && tar -zxpf /usr/XenDemoCD/root.tar.gz
+
+%% You'll need to edit /mnt/root/etc/fstab to reflect your file system
+%% configuration. Changing the password file (etc/shadow) is probably a
+%% good idea too.
+
+%% To install the usr file system, copy the file system from CD on /usr,
+%% though leaving out the "XenDemoCD" and "boot" directories:
+%% cd /usr && cp -a X11R6 etc java libexec root src bin dict kerberos local sbin tmp doc include lib man share /mnt/usr
+
+%% If you intend to boot off these file systems (i.e. use them for
+%% domain 0), then you probably want to copy the /usr/boot directory on
+%% the cd over the top of the current symlink to /boot on your root
+%% filesystem (after deleting the current symlink) i.e.:
+%% cd /mnt/root ; rm boot ; cp -a /usr/boot .