aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.rootkeys1
-rw-r--r--BitKeeper/etc/ignore1
-rw-r--r--docs/figs/xenserver.obj312
-rw-r--r--docs/src/interface.tex312
4 files changed, 480 insertions, 146 deletions
diff --git a/.rootkeys b/.rootkeys
index 37ea52e07c..cfc8504377 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -8,6 +8,7 @@
3f69d8abYB1vMyD_QVDvzxy5Zscf1A TODO
3f9e7d53iC47UnlfORp9iC1vai6kWw docs/Makefile
3f9e7d60PWZJeVh5xdnk0nLUdxlqEA docs/figs/xenlogo.eps
+418273f3YZUyGIrNbERVAPFeOd9gww docs/figs/xenserver.obj
4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc/XenDebugger-HOWTO
412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore
index e0c4c689fb..d039d56f50 100644
--- a/BitKeeper/etc/ignore
+++ b/BitKeeper/etc/ignore
@@ -78,3 +78,4 @@ docs/xend/internals.pl
docs/xend/labels.pl
docs/xend/xend.css
docs/xend/xend.html
+docs/figs/xenserver.eps
diff --git a/docs/figs/xenserver.obj b/docs/figs/xenserver.obj
new file mode 100644
index 0000000000..4d2da1183f
--- /dev/null
+++ b/docs/figs/xenserver.obj
@@ -0,0 +1,312 @@
+%TGIF 4.1.8
+state(0,37,100.000,0,108,0,4,1,16,2,2,2,0,1,2,1,1,'Helvetica-Oblique',2,80640,0,8,1,5,-4,0,1,1,0,16,1,0,1,1,1,1,1088,1408,0,0,2880,0).
+%
+% @(#)$Header$
+% %W%
+%
+unit("1 pixel/pixel").
+color_info(28,65535,0,[
+ "black", 0, 0, 0, 0, 0, 0, 1,
+ "gray10", 6682, 6682, 6682, 6682, 6682, 6682, 1,
+ "gray20", 13107, 13107, 13107, 13107, 13107, 13107, 1,
+ "gray30", 19789, 19789, 19789, 19789, 19789, 19789, 1,
+ "gray40", 26214, 26214, 26214, 26214, 26214, 26214, 1,
+ "gray50", 32639, 32639, 32639, 32639, 32639, 32639, 1,
+ "gray60", 39321, 39321, 39321, 39321, 39321, 39321, 1,
+ "gray70", 46003, 46003, 46003, 46003, 46003, 46003, 1,
+ "gray80", 52428, 52428, 52428, 52428, 52428, 52428, 1,
+ "gray90", 58853, 58853, 58853, 58853, 58853, 58853, 1,
+ "white", 65535, 65535, 65535, 65535, 65535, 65535, 1,
+ "red", 65535, 0, 0, 65535, 0, 0, 1,
+ "orange", 65535, 42405, 0, 65535, 42405, 0, 1,
+ "yellow", 65535, 65535, 0, 65535, 65535, 0, 1,
+ "green", 0, 65535, 0, 0, 65535, 0, 1,
+ "blue", 0, 0, 65535, 0, 0, 65535, 1,
+ "blue4", 0, 0, 35723, 0, 0, 35723, 1,
+ "violet", 61166, 33410, 61166, 61166, 33410, 61166, 1,
+ "magenta", 65535, 0, 65535, 65535, 0, 65535, 1,
+ "cyan", 0, 65535, 65535, 0, 65535, 65535, 1,
+ "wheat", 62965, 57054, 46003, 62965, 57054, 46003, 1,
+ "wheat3", 52685, 47802, 38550, 52685, 47802, 38550, 1,
+ "wheat4", 35723, 32382, 26214, 35723, 32382, 26214, 1,
+ "pink", 65535, 49344, 52171, 65535, 49344, 52171, 1,
+ "palegreen", 39064, 64507, 39064, 39064, 64507, 39064, 1,
+ "skyblue", 34695, 52942, 60395, 34695, 52942, 60395, 1,
+ "CadetBlue", 24415, 40606, 41120, 24415, 40606, 41120, 1,
+ "DarkSlateGray", 12079, 20303, 20303, 12079, 20303, 20303, 1
+]).
+script_frac("0.6").
+fg_bg_colors('blue4','gray90').
+page(1,"",1,'').
+group([
+rcbox('gray90','',375,225,440,435,1,2,1,8,16,69683,0,0,0,0,'2',0,[
+]),
+rcbox('gray20','',375,225,440,435,0,2,1,8,16,69684,0,0,0,0,'2',0,[
+])
+],
+69682,0,0,[
+]).
+group([
+rcbox('gray90','',450,225,515,435,1,2,1,8,16,69623,0,0,0,0,'2',0,[
+]),
+rcbox('gray20','',450,225,515,435,0,2,1,8,16,69624,0,0,0,0,'2',0,[
+])
+],
+69622,0,0,[
+]).
+group([
+rcbox('gray90','',525,225,590,435,1,2,1,8,16,69119,0,0,0,0,'2',0,[
+]),
+rcbox('gray20','',525,225,590,435,0,2,1,8,16,69120,0,0,0,0,'2',0,[
+])
+],
+69366,0,0,[
+]).
+box('gray40','',227,457,607,502,1,2,1,69020,0,0,0,0,0,'2',0,[
+]).
+box('gray40','',235,335,340,435,1,2,1,69017,0,0,0,0,0,'2',0,[
+]).
+box('gray40','',235,230,340,330,1,2,1,69001,0,0,0,0,0,'2',0,[
+]).
+box('gray80','',230,330,335,430,1,2,1,68660,0,0,0,0,0,'2',0,[
+]).
+box('gray80','',230,225,335,325,1,2,1,68663,0,0,0,0,0,'2',0,[
+]).
+box('gray70','',222,452,602,497,1,2,1,68416,0,0,0,0,0,'2',0,[
+]).
+text('black',621,451,3,1,1,14,55,68422,18,5,0,-7,0,0,2,14,55,-1,2,"",0,0,0,0,469,'',[
+minilines(14,55,-1,2,1,-7,0,[
+mini_line(12,18,5,-1,2,0,[
+str_block(0,12,18,5,-1,2,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,12,18,5,-1,2,0,0,0,0,0,
+ "X")])
+]),
+mini_line(12,18,5,0,1,0,[
+str_block(0,12,18,5,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,12,18,5,0,1,0,0,0,0,0,
+ "E")])
+]),
+mini_line(14,18,5,0,1,0,[
+str_block(0,14,18,5,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,14,18,5,0,1,0,0,0,0,0,
+ "N")])
+])
+])]).
+text('black',282,244,3,1,1,99,60,68643,16,4,0,0,0,0,2,99,60,-1,0,"",0,0,0,0,260,'',[
+minilines(99,60,-1,0,1,0,0,[
+mini_line(98,16,4,0,0,0,[
+str_block(0,98,16,4,0,-4,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,97920,98,16,4,0,-4,0,0,0,0,0,
+ "Control and ")])
+]),
+mini_line(99,16,4,-1,0,0,[
+str_block(0,99,16,4,-1,0,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,97920,99,16,4,-1,0,0,0,0,0,0,
+ "Management")])
+]),
+mini_line(69,16,4,0,0,0,[
+str_block(0,69,16,4,0,-1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,97920,69,16,4,0,-1,0,0,0,0,0,
+ "Software")])
+])
+])]).
+text('black',280,349,2,1,1,85,42,68748,18,5,0,-4,0,0,2,85,42,0,1,"",0,0,0,0,367,'',[
+minilines(85,42,0,1,1,-4,0,[
+mini_line(85,18,5,0,1,0,[
+str_block(0,85,18,5,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,85,18,5,0,1,0,0,0,0,0,
+ "Privileged")])
+]),
+mini_line(79,18,5,0,0,0,[
+str_block(0,79,18,5,0,0,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,79,18,5,0,0,0,0,0,0,0,
+ "GuestOS")])
+])
+])]).
+text('black',280,393,1,1,1,74,17,68749,14,3,0,-4,0,0,2,74,17,0,0,"",0,0,0,0,407,'',[
+minilines(74,17,0,0,1,-4,0,[
+mini_line(74,14,3,0,0,0,[
+str_block(0,74,14,3,0,0,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,80640,74,14,3,0,0,0,0,0,0,0,
+ "(XenLinux)")])
+])
+])]).
+box('gray80','',236,459,351,489,1,2,1,68474,0,0,0,0,0,'2',0,[
+]).
+text('black',291,465,1,1,1,92,17,68470,14,3,0,-4,0,0,2,92,17,0,3,"",0,0,0,0,479,'',[
+minilines(92,17,0,3,1,-4,0,[
+mini_line(92,14,3,0,3,0,[
+str_block(0,92,14,3,0,3,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,80640,92,14,3,0,3,0,0,0,0,0,
+ "VM control i/f")])
+])
+])]).
+box('gray80','',396,459,586,489,1,2,1,68895,0,0,0,0,0,'2',0,[
+]).
+text('black',496,465,1,1,1,143,17,68896,14,3,0,-4,0,0,2,143,17,0,1,"",0,0,0,0,479,'',[
+minilines(143,17,0,1,1,-4,0,[
+mini_line(143,14,3,0,1,0,[
+str_block(0,143,14,3,0,1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,80640,143,14,3,0,1,0,0,0,0,0,
+ "Virtualized Hardware")])
+])
+])]).
+poly('black','',2,[
+ 355,473,395,473],1,4,1,68913,0,2,0,0,0,0,0,'4',0,0,
+ "0","",[
+ 0,14,6,0,'14','6','0'],[0,14,6,0,'14','6','0'],[
+]).
+poly('black','',2,[
+ 280,423,280,453],1,4,1,68941,0,2,0,0,0,0,0,'4',0,0,
+ "0","",[
+ 0,14,6,0,'14','6','0'],[0,14,6,0,'14','6','0'],[
+]).
+poly('black','',2,[
+ 255,313,255,343],1,2,1,68983,0,2,0,0,0,0,0,'2',0,0,
+ "0","",[
+ 0,10,4,0,'10','4','0'],[0,10,4,0,'10','4','0'],[
+]).
+poly('black','',2,[
+ 305,313,305,343],1,2,1,68984,0,2,0,0,0,0,0,'2',0,0,
+ "0","",[
+ 0,10,4,0,'10','4','0'],[0,10,4,0,'10','4','0'],[
+]).
+poly('black','',2,[
+ 280,313,280,343],1,2,1,68989,0,2,0,0,0,0,0,'2',0,0,
+ "0","",[
+ 0,10,4,0,'10','4','0'],[0,10,4,0,'10','4','0'],[
+]).
+box('gray70','',284,520,609,555,1,2,1,68490,0,0,0,0,0,'2',0,[
+]).
+text('black',444,527,1,1,1,276,23,68493,18,5,0,-8,0,0,2,276,23,0,0,"",0,0,0,0,545,'',[
+minilines(276,23,0,0,1,-8,0,[
+mini_line(276,18,5,0,0,0,[
+str_block(0,276,18,5,0,-1,0,0,0,[
+str_seg('black','Helvetica-BoldOblique',3,115200,276,18,5,0,-1,0,0,0,0,0,
+ "H/W (SMP x86, mem, net, block)")])
+])
+])]).
+poly('black','',2,[
+ 445,483,445,518],3,3,1,68528,0,2,0,0,0,0,0,'3',0,0,
+ "0","",[
+ 0,12,5,0,'12','5','0'],[0,12,5,0,'12','5','0'],[
+]).
+poly('black','',2,[
+ 500,483,500,518],3,3,1,68529,0,2,0,0,0,0,0,'3',0,0,
+ "0","",[
+ 0,12,5,0,'12','5','0'],[0,12,5,0,'12','5','0'],[
+]).
+poly('black','',2,[
+ 555,483,555,518],3,3,1,68530,0,2,0,0,0,0,0,'3',0,0,
+ "0","",[
+ 0,12,5,0,'12','5','0'],[0,12,5,0,'12','5','0'],[
+]).
+text('black',405,254,2,1,1,34,36,68698,16,4,0,-4,0,0,2,34,36,0,1,"",0,0,0,0,270,'',[
+minilines(34,36,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(32,16,4,0,1,0,[
+str_block(0,32,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,32,16,4,0,1,0,0,0,0,0,
+ "S/W")])
+])
+])]).
+text('black',405,354,3,1,1,44,52,69100,16,4,0,-4,0,0,2,44,52,0,1,"",0,0,0,0,370,'',[
+minilines(44,52,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(44,16,4,0,0,0,[
+str_block(0,44,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,44,16,4,0,0,0,0,0,0,0,
+ "Guest")])
+]),
+mini_line(24,16,4,0,0,0,[
+str_block(0,24,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,24,16,4,0,0,0,0,0,0,0,
+ "OS")])
+])
+])]).
+text('black',480,254,2,1,1,34,36,69114,16,4,0,-4,0,0,2,34,36,0,1,"",0,0,0,0,270,'',[
+minilines(34,36,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(32,16,4,0,1,0,[
+str_block(0,32,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,32,16,4,0,1,0,0,0,0,0,
+ "S/W")])
+])
+])]).
+text('black',480,354,3,1,1,44,52,69115,16,4,0,-4,0,0,2,44,52,0,1,"",0,0,0,0,370,'',[
+minilines(44,52,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(44,16,4,0,0,0,[
+str_block(0,44,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,44,16,4,0,0,0,0,0,0,0,
+ "Guest")])
+]),
+mini_line(24,16,4,0,0,0,[
+str_block(0,24,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,24,16,4,0,0,0,0,0,0,0,
+ "OS")])
+])
+])]).
+text('black',555,254,2,1,1,34,36,69116,16,4,0,-4,0,0,2,34,36,0,1,"",0,0,0,0,270,'',[
+minilines(34,36,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(32,16,4,0,1,0,[
+str_block(0,32,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,32,16,4,0,1,0,0,0,0,0,
+ "S/W")])
+])
+])]).
+text('black',555,354,3,1,1,44,52,69117,16,4,0,-4,0,0,2,44,52,0,1,"",0,0,0,0,370,'',[
+minilines(44,52,0,1,1,-4,0,[
+mini_line(34,16,4,0,1,0,[
+str_block(0,34,16,4,0,1,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,34,16,4,0,1,0,0,0,0,0,
+ "User")])
+]),
+mini_line(44,16,4,0,0,0,[
+str_block(0,44,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,44,16,4,0,0,0,0,0,0,0,
+ "Guest")])
+]),
+mini_line(24,16,4,0,0,0,[
+str_block(0,24,16,4,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,97920,24,16,4,0,0,0,0,0,0,0,
+ "OS")])
+])
+])]).
+text('black',282,201,1,1,1,116,17,69753,14,3,2,-4,0,0,2,116,17,0,2,"",0,0,0,0,215,'',[
+minilines(116,17,0,2,1,-4,0,[
+mini_line(116,14,3,0,2,0,[
+str_block(0,116,14,3,0,2,0,0,0,[
+str_seg('blue4','Helvetica-BoldOblique',3,80640,116,14,3,0,2,0,0,0,0,0,
+ "Management VM")])
+])
+])]).
+text('black',480,201,1,1,1,146,17,69770,14,3,2,-4,0,0,2,146,17,0,0,"",0,0,0,0,215,'',[
+minilines(146,17,0,0,1,-4,0,[
+mini_line(146,14,3,0,0,0,[
+str_block(0,146,14,3,0,0,0,0,0,[
+str_seg('blue4','Helvetica-Oblique',2,80640,146,14,3,0,0,0,0,0,0,0,
+ "User Virtual Machines")])
+])
+])]).
diff --git a/docs/src/interface.tex b/docs/src/interface.tex
index 8410582c23..d6ac3542b2 100644
--- a/docs/src/interface.tex
+++ b/docs/src/interface.tex
@@ -1,5 +1,6 @@
\documentclass[11pt,twoside,final,openright]{xenstyle}
\usepackage{a4,graphicx,setspace,times}
+\usepackage{comment,parskip}
\setstretch{1.15}
\begin{document}
@@ -64,212 +65,206 @@ provides secure partitioning between virtual machines (known as
accounting and QoS isolation than can be achieved with a conventional
operating system.
-Xen essentially takes a `wholemachine' virtualization approach as
+Xen essentially takes a `whole machine' virtualization approach as
pioneered by IBM VM/370. However, unlike VM/370 or more recent
-efforts such as VMWare and Virtual PC, Xen doesn not attempt to
+efforts such as VMWare and Virtual PC, Xen does not attempt to
completely virtualize the underlying hardware. Instead parts of the
-hosted guest operating systems are modified to work with the
-VMM; the operating system is effectively ported to a new target
-architecture, typically requiring changes in just the
-machine-dependent code. The user-level API is unchanged, thus
-existing binaries and operating system distributions work without
-modification.
+hosted guest operating systems are modified to work with the VMM; the
+operating system is effectively ported to a new target architecture,
+typically requiring changes in just the machine-dependent code. The
+user-level API is unchanged, and so existing binaries and operating
+system distributions work without modification.
In addition to exporting virtualized instances of CPU, memory, network
and block devices, Xen exposes a control interface to manage how these
resources are shared between the running domains. Access to the
control interface is restricted: it may only be used by one
-specially-privileged VM, known as {\em Domain-0}. This domain is a
-required part of any Xen-base server and runs the application software
+specially-privileged VM, known as {\em domain-0}. This domain is a
+required part of any Xen-based server and runs the application software
that manages the control-plane aspects of the platform. Running the
control software in {\it domain-0}, distinct from the hypervisor
-itself, allows the Xen framework to separate the notions of {\it
-mechanism} and {\it policy} within the system.
+itself, allows the Xen framework to separate the notions of
+mechanism and policy within the system.
-\chapter{CPU state}
+
+\chapter{Virtual Architecture}
+
+On a Xen-based system, the hypervisor itself runs in {\it ring 0}. It
+has full access to the physical memory available in the system and is
+responsible for allocating portions of it to the domains. Guest
+operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
+they see fit. Segmentation is used to prevent the guest OS from
+accessing the portion of the address space that is reserved for
+Xen. We expect most guest operating systems will use ring 1 for their
+own operation and place applications in ring 3.
+
+In this chapter we consider the basic virtual architecture provided
+by Xen: the basic CPU state, exception and interrupt handling, and
+time. Other aspects such as memory and device access are discussed
+in later chapters.
+
+\section{CPU state}
All privileged state must be handled by Xen. The guest OS has no
direct access to CR3 and is not permitted to update privileged bits in
-EFLAGS.
+EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen;
+these are analagous to system calls but occur from ring 1 to ring 0.
-\chapter{Exceptions}
+\section{Exceptions}
The IDT is virtualised by submitting to Xen a table of trap handlers.
Most trap handlers are identical to native x86 handlers, although the
-page-fault handler is a noteable exception.
-
+page-fault handler is somewhat different.
-\chapter{Interrupts and events}
-Interrupts are virtualized by mapping them to events, which are delivered
-asynchronously to the target domain. A guest OS can map these events onto
-its standard interrupt dispatch mechanisms, such as a simple vectoring
-scheme. Each physical interrupt source controlled by the hypervisor, including
-network devices, disks, or the timer subsystem, is responsible for identifying
-the target for an incoming interrupt and sending an event to that domain.
+\section{Interrupts and events}
-This demultiplexing mechanism also provides a device-specific mechanism for
-event coalescing or hold-off. For example, a guest OS may request to only
-actually receive an event after {\it n} packets are queued ready for delivery
-to it, {\it t} nanoseconds after the first packet arrived (which ever is true
-first). This allows latency and throughput requirements to be addressed on a
-domain-specific basis.
+Interrupts are virtualized by mapping them to events, which are
+delivered asynchronously to the target domain. A guest OS can map
+these events onto its standard interrupt dispatch mechanisms. Xen
+is responsible for determining the target domain that will handle
+each physical interrupt source.
-\chapter{Time}
+\section{Time}
Guest operating systems need to be aware of the passage of both real
-(or wallclock) time and their own `virtual time' (i.e., the time for
-which they have been executing) Furthermore, a notion of time is
-required in the hypervisor itself for scheduling and the activities
-that relate to it. To this end the hypervisor provides for notions of
-time: cycle counter time, system time, wall clock time, domain virtual
-time.
+(or wallclock) time and their own `virtual time' (the time for
+which they have been executing). Furthermore, Xen has a notion of
+time which is used for scheduling. The following notions of
+time are provided:
-\section{Cycle counter time}
+\begin{description}
+\item[Cycle counter time.]
-This provides the finest-grained, free-running time reference, with the
-approximate frequency being publicly accessible. The cycle counter time is
+This provides a fine-grained time reference. The cycle counter time is
used to accurately extrapolate the other time references. On SMP machines
it is currently assumed that the cycle counter time is synchronised between
CPUs. The current x86-based implementation achieves this within inter-CPU
communication latencies.
-\section{System time}
+\item[System time.]
-This is a 64-bit value containing the nanoseconds elapsed since boot
-time. Unlike cycle counter time, system time accurately reflects the
-passage of real time, i.e. it is adjusted several times a second for timer
-drift. This is done by running an NTP client in {\it domain0} on behalf of
-the machine, feeding updates to the hypervisor. Intermediate values can be
-extrapolated using the cycle counter.
+This is a 64-bit counter which holds the number of nanoseconds that
+have elapsed since system boot.
-\section{Wall clock time}
-This is the actual ``time of day'' Unix style struct timeval (i.e. seconds and
-microseconds since 1 January 1970, adjusted by leap seconds etc.). Again, an
-NTP client hosted by {\it domain0} can help maintain this value. To guest
-operating systems this value will be reported instead of the hardware RTC
-clock value and they can use the system time and cycle counter times to start
-and remain perfectly in time.
+\item[Wall clock time.]
-\section{Domain virtual time}
+This is the time of day in a Unix-style {\tt struct timeval} (seconds
+and microseconds since 1 January 1970, adjusted by leap seconds). An
+NTP client hosted by {\it domain-0} can keep this value accurate.
-This progresses at the same pace as cycle counter time, but only while a
-domain is executing. It stops while a domain is de-scheduled. Therefore the
-share of the CPU that a domain receives is indicated by the rate at which
-its domain virtual time increases, relative to the rate at which cycle
-counter time does so.
-\section{Time interface}
+\item[Domain virtual time.]
-Xen exports some timestamps to guest operating systems through their shared
-info page. Timestamps are provided for system time and wall-clock time. Xen
-also provides the cycle counter values at the time of the last update
-allowing guests to calculate the current values. The cpu frequency and a
-scaling factor are provided for guests to convert cycle counter values to
-real time. Since all time stamps need to be updated and read
-\emph{atomically} two version numbers are also stored in the shared info
-page.
+This progresses at the same pace as system time, but only while a
+domain is executing --- it stops while a domain is de-scheduled.
+Therefore the share of the CPU that a domain receives is indicated by
+the rate at which its virtual time increases.
-Xen will ensure that the time stamps are updated frequently enough to avoid
-an overflow of the cycle counter values. A guest can check if its notion of
-time is up-to-date by comparing the version numbers.
+\end{description}
-\section{Timer events}
-Xen maintains a periodic timer (currently with a 10ms period) which sends a
-timer event to the currently executing domain. This allows Guest OSes to
-keep track of the passing of time when executing. The scheduler also
-arranges for a newly activated domain to receive a timer event when
-scheduled so that the Guest OS can adjust to the passage of time while it
-has been inactive.
+Xen exports timestamps for system time and wall-clock time to guest
+operating systems through a shared page of memory. Xen also provides
+the cycle counter time at the instant the timestamps were calculated,
+and the CPU frequency in Hertz. This allows the guest to extrapolate
+system and wall-clock times accurately based on the current cycle
+counter time.
-In addition, Xen exports a hypercall interface to each domain which allows
-them to request a timer event sent to them at the specified system
-time. Guest OSes may use this timer to implement timeout values when they
-block.
+Since all time stamps need to be updated and read \emph{atomically}
+two version numbers are also stored in the shared info page. The
+first is incremented prior to an update, while the second is only
+incremented afterwards. Thus a guest can be sure that it read a consistent
+state by checking the two version numbers are equal.
+
+Xen includes a periodic ticker which sends a timer event to the
+currently executing domain every 10ms. The Xen scheduler also sends a
+timer event whenever a domain is scheduled; this allows the guest OS
+to adjust for the time that has passed while it has been inactive. In
+addition, Xen allows each domain to request that they receive a timer
+event sent at a specified system time. Guest OSes may use this timer to
+implement timeout values when they block.
\chapter{Memory}
-The hypervisor is responsible for providing memory to each of the
-domains running over it. However, the Xen hypervisor's duty is
-restricted to managing physical memory and to policying page table
-updates. All other memory management functions are handled
-externally. Start-of-day issues such as building initial page tables
-for a domain, loading its kernel image and so on are done by the {\it
-domain builder} running in user-space in {\it domain0}. Paging to
-disk and swapping is handled by the guest operating systems
-themselves, if they need it.
+Xen is responsible for managing the allocation of physical memory to
+domains, and for ensuring safe use of the paging and segmentation
+hardware.
-On a Xen-based system, the hypervisor itself runs in {\it ring 0}. It
-has full access to the physical memory available in the system and is
-responsible for allocating portions of it to the domains. Guest
-operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
-they see fit, aside from the fact that segmentation is used to prevent
-the guest OS from accessing a portion of the linear address space that
-is reserved for use by the hypervisor. This approach allows
-transitions between the guest OS and hypervisor without flushing the
-TLB. We expect most guest operating systems will use ring 1 for their
-own operation and place applications (if they support such a notion)
-in ring 3.
-
-\section{Physical Memory Allocation}
-
-The hypervisor reserves a small fixed portion of physical memory at
-system boot time. This special memory region is located at the
-beginning of physical memory and is mapped at the very top of every
-virtual address space.
-
-Any physical memory that is not used directly by the hypervisor is divided into
-pages and is available for allocation to domains. The hypervisor tracks which
-pages are free and which pages have been allocated to each domain. When a new
-domain is initialized, the hypervisor allocates it pages drawn from the free
-list. The amount of memory required by the domain is passed to the hypervisor
-as one of the parameters for new domain initialization by the domain builder.
-Domains can never be allocated further memory beyond that which was
-requested for them on initialization. However, a domain can return
-pages to the hypervisor if it discovers that its memory requirements
-have diminished.
+\section{Memory Allocation}
-\section{Page Table Updates}
+Xen resides within a small fixed portion of physical memory and
+reserves the top 64MB of every virtual address space. The remaining
+physical memory is available for allocation to domains at a page
+granularity. Xen tracks the ownership and use of each page, which
+allows it to enforce secure partitioning between domains.
-In addition to managing physical memory allocation, the hypervisor is also in
-charge of performing page table updates on behalf of the domains. This is
-necessary to prevent domains from adding arbitrary mappings to their page
-tables or introducing mappings to other's page tables.
-
-\section{Writable Page Tables}
-
-Rather than using the explicit page-update interface that Xen
-provides, guests may also be provided with the illusion that their
-page tables are directly writable. Of course this is not really the
-case, since Xen must validate modifications to ensure secure
-partitioning of domains. Instead, Xen detects any write attempt to a
-memory page that is currently part of a page table. If such an access
-occurs, Xen temporarily allows write access to that page while at the
-same time {\em disconnecting} it from the page table that is currently
-in use. This allows the guest to safely make updates to the page
-because the newly-updated entries cannot be used by the MMU until Xen
-revalidates and {\em reconnects} the page.
+Each domain has a maximum and current physical memory allocation.
+A guest OS may run a `balloon driver' to dynamically adjust its
+current memory allocation up to its limit.
+
+\section{Page Table Updates}
+
+In the default mode of operation, Xen enforces read-only access to
+page tables and requires guest operating systems to explicitly request
+any modifications. Xen validates all such requests and only applies
+updates that it deems safe. This is necessary to prevent domains from
+adding arbitrary mappings to their page tables.
+
+To aid validation, Xen associates a type and reference count with each
+memory page. A page has one of the following
+mutually-exclusive types at any point in time: page directory ({\sf
+PD}), page table ({\sf PT}), local descriptor table ({\sf LDT}),
+global descriptor table ({\sf GDT}), or writable ({\sf RW}). Note that
+a guest OS may always create readable mappings of its own memory
+regardless of its current type.
+%%% XXX: possibly explain more about ref count 'lifecyle' here?
+This mechanism is used to
+maintain the invariants required for safety; for example, a domain
+cannot have a writable mapping to any part of a page table as this
+would require the page concerned to simultaneously be of types {\sf
+ PT} and {\sf RW}.
+
+
+%\section{Writable Page Tables}
+
+Xen also provides an alternative mode of operation in which guests be
+have the illusion that their page tables are directly writable. Of
+course this is not really the case, since Xen must still validate
+modifications to ensure secure partitioning. To this end, Xen traps
+any write attempt to a memory page of type {\sf PT} (i.e., that is
+currently part of a page table). If such an access occurs, Xen
+temporarily allows write access to that page while at the same time
+{\em disconnecting} it from the page table that is currently in
+use. This allows the guest to safely make updates to the page because
+the newly-updated entries cannot be used by the MMU until Xen
+revalidates and reconnects the page.
Reconnection occurs automatically in a number of situations: for
example, when the guest modifies a different page-table page, when the
domain is preempted, or whenever the guest uses Xen's explicit
page-table update interfaces.
+
\section{Segment Descriptor Tables}
-On boot a guest is supplied with a default GDT, which is {\em not}
-taken from its own memory allocation. If the guest wishes to use other
-than the default `flat' ring-1 and ring-3 segments that this default
-table provides, it must register a custom GDT and/or LDT with Xen,
-allocated from its own memory.
+On boot a guest is supplied with a default GDT, which does not reside
+within its own memory allocation. If the guest wishes to use other
+than the default `flat' ring-1 and ring-3 segments that this GDT
+provides, it must register a custom GDT and/or LDT with Xen,
+allocated from its own memory. Note that a number of GDT
+entries are reserved by Xen -- any custom GDT must also include
+sufficent space for these entries.
+
+For example, the following hypercall is used to specify a new GDT:
+\begin{quote}
int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em entries})
{\em frame\_list}: An array of up to 16 page frames within which the
@@ -279,12 +274,18 @@ mappings, no use as a page-table page, and so on).
{\em entries}: The number of descriptor-entry slots in the GDT. Note
that the table must be large enough to contain Xen's reserved entries;
-thus we must have '{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}'.
+thus we must have `{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}\ '.
Note also that, after registering the GDT, slots {\em FIRST\_} through
{\em LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest and
may be overwritten by Xen.
+\end{quote}
+
+
+XXX SMH: HERE
+
\section{Pseudo-Physical Memory}
+
The usual problem of external fragmentation means that a domain is
unlikely to receive a contiguous stretch of physical memory. However,
most guest operating systems do not have built-in support for
@@ -302,6 +303,21 @@ tables contain real physical addresses. Mapping {\it pseudo physical}
to {\it real physical} addresses is needed on page table updates and
also on remapping memory regions with the guest OS.
+\section{start of day xxx}
+
+
+Start-of-day issues such as building initial page tables
+for a domain, loading its kernel image and so on are done by the {\it
+domain builder} running in user-space in {\it domain0}. Paging to
+disk and swapping is handled by the guest operating systems
+themselves, if they need it.
+
+The amount of memory required by the domain is passed to the hypervisor
+as one of the parameters for new domain initialization by the domain builder.
+
+
+
+
\chapter{Network I/O}
@@ -444,6 +460,8 @@ Xen's internal scheduler API.
More information on the characteristics and use of these schedulers is
available in { \tt Sched-HOWTO.txt }.
+\begin{comment}
+
\section{Scheduling API}
The scheduling API is used by both the schedulers described above and should
@@ -967,4 +985,6 @@ Interact with the console, operations are:
\hypercall{ update\_va\_mapping\_otherdomain(unsigned long page\_nr, unsigned long val, unsigned long flags, uint16\_t domid)}
+\end{comment}
+
\end{document}