aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIsaku Yamahata <yamahata@valinux.co.jp>2008-08-07 11:57:34 +0900
committerIsaku Yamahata <yamahata@valinux.co.jp>2008-08-07 11:57:34 +0900
commitbe119bba8e85b2f7ebec4923c349fb7e7007f759 (patch)
tree4d14f8ac92f4bb84663bfab477fe189132f434c9
parent91afde280ec7c5495481a4264ce38cdc5d880034 (diff)
parent99b92a0e1506e7e94b7a3d552820b5405a5f8646 (diff)
downloadxen-be119bba8e85b2f7ebec4923c349fb7e7007f759.tar.gz
xen-be119bba8e85b2f7ebec4923c349fb7e7007f759.tar.bz2
xen-be119bba8e85b2f7ebec4923c349fb7e7007f759.zip
merge with xen-unstable.hg
-rw-r--r--.hgignore20
-rw-r--r--.hgtags2
-rw-r--r--Config.mk6
-rw-r--r--Makefile2
-rw-r--r--README22
-rw-r--r--docs/Docs.mk4
-rw-r--r--docs/Makefile16
-rw-r--r--docs/misc/vtd.txt9
-rw-r--r--docs/src/user.tex8
-rw-r--r--docs/xen-api/Makefile8
-rw-r--r--docs/xen-api/coversheet.tex4
-rw-r--r--docs/xen-api/revision-history.tex9
-rw-r--r--docs/xen-api/xenapi-coversheet.tex4
-rw-r--r--docs/xen-api/xenapi-datamodel-graph.dot7
-rw-r--r--docs/xen-api/xenapi-datamodel.tex1259
-rw-r--r--extras/mini-os/fs-front.c244
-rw-r--r--extras/mini-os/gntmap.c252
-rw-r--r--extras/mini-os/include/fs.h5
-rw-r--r--extras/mini-os/include/gntmap.h35
-rw-r--r--extras/mini-os/include/lib.h3
-rw-r--r--extras/mini-os/lib/sys.c12
-rw-r--r--extras/mini-os/minios.mk1
-rw-r--r--extras/mini-os/pcifront.c11
-rw-r--r--stubdom/Makefile38
-rw-r--r--stubdom/README18
-rw-r--r--stubdom/stubdom-dm6
-rw-r--r--tools/Makefile15
-rw-r--r--tools/blktap/Makefile3
-rw-r--r--tools/blktap/lib/Makefile2
-rw-r--r--tools/console/Makefile2
-rw-r--r--tools/console/daemon/io.c12
-rw-r--r--tools/examples/Makefile18
-rw-r--r--tools/examples/README4
-rw-r--r--tools/examples/xend-config.sxp4
-rw-r--r--tools/examples/xmexample.hvm5
-rw-r--r--tools/examples/xmexample.hvm-dm (renamed from tools/examples/stubdom-ExampleHVMDomain)0
-rw-r--r--tools/examples/xmexample.hvm-stubdom4
-rw-r--r--tools/examples/xmexample.pv-grub212
-rw-r--r--tools/examples/xmexample.vti5
-rw-r--r--tools/examples/xmexample322
-rw-r--r--tools/firmware/extboot/Makefile2
-rw-r--r--tools/firmware/hvmloader/Makefile8
-rw-r--r--tools/firmware/hvmloader/acpi/acpi2_0.h2
-rw-r--r--tools/firmware/hvmloader/acpi/build.c41
-rw-r--r--tools/firmware/hvmloader/hvmloader.c9
-rw-r--r--tools/firmware/hvmloader/util.h2
-rw-r--r--tools/firmware/rombios/32bit/32bitbios.c2
-rw-r--r--tools/firmware/rombios/32bit/Makefile9
-rw-r--r--tools/firmware/rombios/32bit/tcgbios/Makefile2
-rw-r--r--tools/firmware/rombios/32bit/tcgbios/tcgbios.c43
-rw-r--r--tools/firmware/rombios/32bit/tcgbios/tcgbios.h41
-rw-r--r--tools/firmware/rombios/32bit/util.c72
-rw-r--r--tools/firmware/rombios/32bit/util.h3
-rw-r--r--tools/firmware/rombios/32bitgateway.c12
-rw-r--r--tools/firmware/rombios/32bitprotos.h6
-rw-r--r--tools/firmware/rombios/Makefile3
-rw-r--r--tools/firmware/rombios/rombios.c152
-rw-r--r--tools/fs-back/fs-backend.c36
-rw-r--r--tools/fs-back/fs-backend.h17
-rw-r--r--tools/fs-back/fs-ops.c137
-rw-r--r--tools/fs-back/fs-xenbus.c19
-rw-r--r--tools/include/xen-sys/MiniOS/privcmd.h2
-rw-r--r--tools/ioemu/block-vbd.c4
-rw-r--r--tools/ioemu/hw/pass-through.c311
-rw-r--r--tools/ioemu/hw/pass-through.h8
-rw-r--r--tools/ioemu/hw/pc.c19
-rw-r--r--tools/ioemu/hw/pci.c7
-rw-r--r--tools/ioemu/hw/pt-msi.c6
-rw-r--r--tools/ioemu/hw/serial.c1
-rw-r--r--tools/ioemu/hw/vga.c4
-rw-r--r--tools/ioemu/vl.c2
-rw-r--r--tools/libaio/src/Makefile2
-rw-r--r--tools/libxc/xc_dom_boot.c30
-rw-r--r--tools/libxc/xc_domain_save.c42
-rw-r--r--tools/libxc/xc_hvm_build.c32
-rw-r--r--tools/libxc/xc_linux.c33
-rw-r--r--tools/libxc/xc_minios.c144
-rw-r--r--tools/libxc/xc_netbsd.c45
-rw-r--r--tools/libxc/xc_physdev.c6
-rw-r--r--tools/libxc/xc_private.h5
-rw-r--r--tools/libxc/xc_solaris.c31
-rw-r--r--tools/libxc/xenctrl.h2
-rw-r--r--tools/pygrub/src/pygrub2
-rw-r--r--tools/python/xen/lowlevel/xc/xc.c3
-rw-r--r--tools/python/xen/util/pci.py112
-rw-r--r--tools/python/xen/util/utils.py44
-rw-r--r--tools/python/xen/xend/XendDomainInfo.py30
-rw-r--r--tools/python/xen/xend/image.py10
-rw-r--r--tools/python/xen/xend/server/pciif.py43
-rw-r--r--tools/python/xen/xm/console.py63
-rw-r--r--tools/python/xen/xm/create.dtd8
-rw-r--r--tools/python/xen/xm/create.py65
-rw-r--r--tools/python/xen/xm/main.py130
-rw-r--r--tools/python/xen/xm/shutdown.py5
-rw-r--r--tools/python/xen/xm/xenapi_create.py81
-rw-r--r--tools/xenmon/Makefile2
-rw-r--r--tools/xenstat/libxenstat/Makefile2
-rw-r--r--tools/xenstat/libxenstat/src/xenstat.c2
-rw-r--r--unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c4
-rw-r--r--xen/Makefile6
-rw-r--r--xen/arch/ia64/xen/domain.c3
-rw-r--r--xen/arch/ia64/xen/irq.c3
-rw-r--r--xen/arch/x86/acpi/cpufreq/utility.c10
-rw-r--r--xen/arch/x86/acpi/pmstat.c6
-rw-r--r--xen/arch/x86/domain.c1
-rw-r--r--xen/arch/x86/domain_build.c1
-rw-r--r--xen/arch/x86/domctl.c13
-rw-r--r--xen/arch/x86/hvm/io.c2
-rw-r--r--xen/arch/x86/hvm/stdvga.c34
-rw-r--r--xen/arch/x86/hvm/svm/svm.c7
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c2
-rw-r--r--xen/arch/x86/io_apic.c8
-rw-r--r--xen/arch/x86/irq.c3
-rw-r--r--xen/arch/x86/mm.c36
-rw-r--r--xen/arch/x86/mm/shadow/multi.c23
-rw-r--r--xen/arch/x86/msi.c15
-rw-r--r--xen/arch/x86/numa.c26
-rw-r--r--xen/arch/x86/physdev.c9
-rw-r--r--xen/arch/x86/platform_hypercall.c27
-rw-r--r--xen/arch/x86/setup.c12
-rw-r--r--xen/arch/x86/shutdown.c13
-rw-r--r--xen/arch/x86/time.c180
-rw-r--r--xen/arch/x86/x86_64/physdev.c9
-rw-r--r--xen/arch/x86/x86_emulate/x86_emulate.c2
-rw-r--r--xen/common/compat/grant_table.c1
-rw-r--r--xen/common/domain.c4
-rw-r--r--xen/common/event_channel.c53
-rw-r--r--xen/common/keyhandler.c54
-rw-r--r--xen/common/page_alloc.c91
-rw-r--r--xen/common/shutdown.c5
-rw-r--r--xen/drivers/char/console.c3
-rw-r--r--xen/drivers/passthrough/amd/iommu_acpi.c233
-rw-r--r--xen/drivers/passthrough/amd/iommu_detect.c207
-rw-r--r--xen/drivers/passthrough/amd/iommu_init.c222
-rw-r--r--xen/drivers/passthrough/amd/iommu_intr.c17
-rw-r--r--xen/drivers/passthrough/amd/iommu_map.c2
-rw-r--r--xen/drivers/passthrough/amd/pci_amd_iommu.c257
-rw-r--r--xen/drivers/passthrough/iommu.c72
-rw-r--r--xen/drivers/passthrough/pci.c118
-rw-r--r--xen/drivers/passthrough/vtd/dmar.c57
-rw-r--r--xen/drivers/passthrough/vtd/extern.h1
-rw-r--r--xen/drivers/passthrough/vtd/iommu.c148
-rw-r--r--xen/drivers/passthrough/vtd/utils.c136
-rw-r--r--xen/include/acpi/cpufreq/processor_perf.h2
-rw-r--r--xen/include/asm-ia64/config.h2
-rw-r--r--xen/include/asm-x86/amd-iommu.h8
-rw-r--r--xen/include/asm-x86/config.h2
-rw-r--r--xen/include/asm-x86/hvm/svm/amd-iommu-proto.h27
-rw-r--r--xen/include/asm-x86/io_apic.h2
-rw-r--r--xen/include/asm-x86/p2m.h2
-rw-r--r--xen/include/public/io/fsif.h24
-rw-r--r--xen/include/public/xen.h1
-rw-r--r--xen/include/xen/iommu.h2
-rw-r--r--xen/include/xen/irq.h2
-rw-r--r--xen/include/xen/sched.h4
-rw-r--r--xen/include/xen/shutdown.h2
-rw-r--r--xen/include/xen/spinlock.h4
157 files changed, 4667 insertions, 1822 deletions
diff --git a/.hgignore b/.hgignore
index 90139f0238..756f9dcf69 100644
--- a/.hgignore
+++ b/.hgignore
@@ -21,8 +21,7 @@
^[^/]*\.bz2$
^\.config$
^\.pc
-^TAGS$
-^tags$
+(^|/)(tags|TAGS)$
^build-.*$
^dist/.*$
^docs/.*\.aux$
@@ -60,10 +59,13 @@
^docs/xen-api/vm_lifecycle.eps$
^docs/xen-api/xenapi-datamodel-graph.eps$
^docs/xen-api/xenapi.out$
-^extras/mini-os/h/hypervisor-ifs$
-^extras/mini-os/h/xen-public$
+^extras/mini-os/arch/ia64/gen_off.s$
+^extras/mini-os/include/mini-os$
+^extras/mini-os/include/ia64/mini-os$
+^extras/mini-os/include/ia64/offsets.h$
+^extras/mini-os/include/x86/mini-os$
+^extras/mini-os/include/xen$
^extras/mini-os/mini-os.*$
-^extras/mini-os/*-stubdom.*$
^install/.*$
^linux-[^/]*-paravirt/.*$
^linux-2.6[^/]*/.*$
@@ -91,13 +93,17 @@
^stubdom/libxc$
^stubdom/lwip-.*$
^stubdom/mini-os-.*$
+^stubdom/mk-headers$
^stubdom/newlib-.*$
^stubdom/pciutils-.*$
^stubdom/zlib-.*$
^stubdom/grub-cvs$
^stubdom/grub/stage2$
^stubdom/grub/netboot$
-^tools/.*/TAGS$
+^stubdom/grub/dirs$
+^stubdom/lwip/
+^stubdom/ioemu/
+^stubdom/grub-upstream/
^tools/.*/build/lib.*/.*\.py$
^tools/blktap/Makefile\.smh$
^tools/blktap/drivers/blktapctrl$
@@ -253,7 +259,6 @@
^xen/\.banner.*$
^xen/BLOG$
^xen/System.map$
-^xen/TAGS$
^xen/arch/x86/asm-offsets\.s$
^xen/arch/x86/boot/mkelf32$
^xen/arch/x86/xen\.lds$
@@ -271,7 +276,6 @@
^xen/include/xen/acm_policy\.h$
^xen/include/xen/banner\.h$
^xen/include/xen/compile\.h$
-^xen/tags$
^xen/tools/figlet/figlet$
^xen/tools/symbols$
^xen/xen$
diff --git a/.hgtags b/.hgtags
index c08e080ab4..51c74a5580 100644
--- a/.hgtags
+++ b/.hgtags
@@ -25,3 +25,5 @@ ed79613b48817d5e0d1f9b3cf104c0e4e8b0d8cf 3.2.0-rc3
c5deb251b9dcece9e466a48a66d3528ca1797db4 3.2.0-rc4
36bb2ab4722733d919d32e4555eb46cc6a06cb8f 3.2.0-rc5
9facc624a238f2b9437b07fa28ff65884aa867f2 3.2.0-rc6
+c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1
+dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2
diff --git a/Config.mk b/Config.mk
index 2cb15d4be0..19c8bd4a6f 100644
--- a/Config.mk
+++ b/Config.mk
@@ -19,6 +19,8 @@ HOSTCFLAGS += -fno-strict-aliasing
DISTDIR ?= $(XEN_ROOT)/dist
DESTDIR ?= /
+DOCDIR ?= /usr/share/doc/xen
+MANDIR ?= /usr/share/man
# Allow phony attribute to be listed as dependency rather than fake target
.PHONY: .phony
@@ -84,7 +86,11 @@ QEMU_REMOTE=http://xenbits.xensource.com/git-http/qemu-xen-unstable.git
# Mercurial in-tree version, or a local directory, or a git URL.
# CONFIG_QEMU ?= ioemu
# CONFIG_QEMU ?= ../qemu-xen.git
+ifeq ($(XEN_TARGET_ARCH),ia64)
+CONFIG_QEMU ?= ioemu
+else
CONFIG_QEMU ?= $(QEMU_REMOTE)
+endif
# Optional components
XENSTAT_XENTOP ?= y
diff --git a/Makefile b/Makefile
index 35a09a6aa7..39aff692bc 100644
--- a/Makefile
+++ b/Makefile
@@ -149,7 +149,7 @@ help:
@echo ' trees then make dist'
@echo ' xen - build and install Xen hypervisor'
@echo ' tools - build and install tools'
- @echo ' stubdomain - build and install the stubdomain images'
+ @echo ' stubdom - build and install the stubdomain images'
@echo ' kernels - build and install guest kernels'
@echo ' kbuild - synonym for make kernels'
@echo ' docs - build and install user documentation'
diff --git a/README b/README
index 7039c5905a..65a27d5f76 100644
--- a/README
+++ b/README
@@ -1,10 +1,10 @@
#################################
- __ __ _____ _____
- \ \/ /___ _ __ |___ / |___ /
- \ // _ \ '_ \ |_ \ |_ \
- / \ __/ | | | ___) | ___) |
- /_/\_\___|_| |_| |____(_)____/
-
+ __ __ _ _ ___
+ \ \/ /___ _ __ | || | / _ \
+ \ // _ \ '_ \ | || |_| | | |
+ / \ __/ | | | |__ _| |_| |
+ /_/\_\___|_| |_| |_|(_)___/
+
#################################
http://www.xen.org/
@@ -21,7 +21,7 @@ development community, spearheaded by XenSource Inc, a company created
by the original Xen development team to build enterprise products
around Xen.
-The 3.3 release offers excellent performance, hardware support and
+The 4.0 release offers excellent performance, hardware support and
enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and
live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD
and Solaris are available from the community.
@@ -54,8 +54,8 @@ performed with root privileges.]
/boot/grub/menu.lst: edit this file to include an entry like the
following:
- title Xen 3.3 / XenLinux 2.6
- kernel /boot/xen-3.3.gz console=vga
+ title Xen 4.0 / XenLinux 2.6
+ kernel /boot/xen-4.0.gz console=vga
module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0
module /boot/initrd-2.6-xen.img
@@ -74,7 +74,7 @@ performed with root privileges.]
32MB memory for internal use, which is not available for allocation
to virtual machines.
-3. Reboot your system and select the "Xen 3.3 / XenLinux 2.6" menu
+3. Reboot your system and select the "Xen 4.0 / XenLinux 2.6" menu
option. After booting Xen, Linux will start and your initialisation
scripts should execute in the usual way.
@@ -224,6 +224,6 @@ tarballs of the source. Instructions in the tboot README describe how
to modify grub.conf to use tboot to launch Xen.
There are optional targets as part of Xen's top-level makefile that will
-downlaod and build tboot: install-tboot, build-tboot, dist-tboot, clean-tboot.
+download and build tboot: install-tboot, build-tboot, dist-tboot, clean-tboot.
These will download the latest tar file from the SourceForge site using wget,
then build/install/dist according to Xen's settings.
diff --git a/docs/Docs.mk b/docs/Docs.mk
index d620213982..3c95cfca45 100644
--- a/docs/Docs.mk
+++ b/docs/Docs.mk
@@ -7,7 +7,3 @@ DOXYGEN := doxygen
POD2MAN := pod2man
DOT := dot
NEATO := neato
-
-pkgdocdir := /usr/share/doc/xen
-mandir := /usr/share/man
-
diff --git a/docs/Makefile b/docs/Makefile
index b412008125..dce7cd9d31 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -80,17 +80,17 @@ distclean: clean
.PHONY: install
install: all
- rm -rf $(DESTDIR)$(pkgdocdir)
- $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)
+ rm -rf $(DESTDIR)$(DOCDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
$(MAKE) -C xen-api install
- cp -dR ps $(DESTDIR)$(pkgdocdir)
- cp -dR pdf $(DESTDIR)$(pkgdocdir)
- $(INSTALL_DIR) $(DESTDIR)$(mandir)
- cp -dR man1 $(DESTDIR)$(mandir)
- cp -dR man5 $(DESTDIR)$(mandir)
- [ ! -d html ] || cp -dR html $(DESTDIR)$(pkgdocdir)
+ cp -dR ps $(DESTDIR)$(DOCDIR)
+ cp -dR pdf $(DESTDIR)$(DOCDIR)
+ $(INSTALL_DIR) $(DESTDIR)$(MANDIR)
+ cp -dR man1 $(DESTDIR)$(MANDIR)
+ cp -dR man5 $(DESTDIR)$(MANDIR)
+ [ ! -d html ] || cp -dR html $(DESTDIR)$(DOCDIR)
pdf/%.pdf: ps/%.ps
$(INSTALL_DIR) $(@D)
diff --git a/docs/misc/vtd.txt b/docs/misc/vtd.txt
index 6264463fb0..277321ba23 100644
--- a/docs/misc/vtd.txt
+++ b/docs/misc/vtd.txt
@@ -2,7 +2,7 @@ Title : How to do PCI Passthrough with VT-d
Authors : Allen Kay <allen.m.kay@intel.com>
Weidong Han <weidong.han@intel.com>
Created : October-24-2007
-Updated : May-07-2008
+Updated : August-06-2008
How to turn on VT-d in Xen
--------------------------
@@ -21,7 +21,7 @@ How to turn on VT-d in Xen
title Xen-Fedora Core (2.6.18-xen)
root (hd0,0)
- kernel /boot/xen.gz com1=115200,8n1 console=com1
+ kernel /boot/xen.gz com1=115200,8n1 console=com1 iommu=1
module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro xencons=ttyS console=tty0 console=ttyS0, pciback.hide=(01:00.0)(03:00.0)
module /boot/initrd-2.6.18-xen.img
@@ -32,6 +32,11 @@ title Xen-Fedora Core (2.6.18-xen)
"ifconfig" to see if IP address has been assigned to NIC devices.
+Enable MSI/MSI-x for assigned devices
+-------------------------------------
+Add "msi=1" option in kernel line of host grub.
+
+
Caveat on Conventional PCI Device Passthrough
---------------------------------------------
diff --git a/docs/src/user.tex b/docs/src/user.tex
index 33d4a213be..59f5a8a9e4 100644
--- a/docs/src/user.tex
+++ b/docs/src/user.tex
@@ -4204,11 +4204,9 @@ writing to the VGA console after domain 0 starts booting (e.g., `vga=text-80x50,
enabled by the BIOS.
\item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform.
This can usually be probed automatically.
-\item [ dma\_bits=xxx ] Specify width of DMA
- addresses in bits. Default is 30 bits (addresses up to 1GB are DMAable).
-\item [ dma\_emergency\_pool=xxx ] Specify lower bound on size of DMA
- pool below which ordinary allocations will fail rather than fall
- back to allocating from the DMA pool.
+\item [ dma\_bits=xxx ] Specify width of DMA addresses in bits. This
+ is used in NUMA systems to prevent this special DMA memory from
+ being exhausted in one node when remote nodes have available memory.
\end{description}
In addition, the following options may be specified on the Xen command
diff --git a/docs/xen-api/Makefile b/docs/xen-api/Makefile
index 214f94325a..a25286a30a 100644
--- a/docs/xen-api/Makefile
+++ b/docs/xen-api/Makefile
@@ -16,11 +16,11 @@ all: build
build: xenapi.pdf xenapi.ps
install:
- $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)/ps
- $(INSTALL_DIR) $(DESTDIR)$(pkgdocdir)/pdf
+ $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)/ps
+ $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)/pdf
- [ -e xenapi.ps ] && cp xenapi.ps $(DESTDIR)$(pkgdocdir)/ps || true
- [ -e xenapi.pdf ] && cp xenapi.pdf $(DESTDIR)$(pkgdocdir)/pdf || true
+ [ -e xenapi.ps ] && cp xenapi.ps $(DESTDIR)$(DOCDIR)/ps || true
+ [ -e xenapi.pdf ] && cp xenapi.pdf $(DESTDIR)$(DOCDIR)/pdf || true
xenapi.dvi: $(TEX) $(EPS) $(EPSDOT)
$(LATEX) xenapi.tex
diff --git a/docs/xen-api/coversheet.tex b/docs/xen-api/coversheet.tex
index 5a9478d20f..3d35bf67da 100644
--- a/docs/xen-api/coversheet.tex
+++ b/docs/xen-api/coversheet.tex
@@ -50,7 +50,7 @@ Gareth Bestor, IBM & Jon Ludlam, XenSource \\
Hollis Blanchard, IBM & Alastair Tse, XenSource \\
Mike Day, IBM & Daniel Veillard, Red Hat \\
Jim Fehlig, Novell & Tom Wilkie, University of Cambridge \\
-Jon Harrop, XenSource & \\
+Jon Harrop, XenSource & Yosuke Iwamatsu, NEC \\
\end{tabular}
\end{large}
@@ -60,4 +60,4 @@ Jon Harrop, XenSource & \\
\legalnotice{}
\newpage
-\pagestyle{fancy} \ No newline at end of file
+\pagestyle{fancy}
diff --git a/docs/xen-api/revision-history.tex b/docs/xen-api/revision-history.tex
index 21ec02c45f..b65fc82ef5 100644
--- a/docs/xen-api/revision-history.tex
+++ b/docs/xen-api/revision-history.tex
@@ -47,5 +47,14 @@
\end{flushleft}
\end{minipage}\\
\hline
+ 1.0.6 & 24th Jul. 08 & Y. Iwamatsu &
+ \begin{minipage}[t]{7cm}
+ \begin{flushleft}
+ Added definitions of new classes DPCI and PPCI. Updated the table
+ and the diagram representing relationships between classes.
+ Added host.PPCIs and VM.DPCIs fields.
+ \end{flushleft}
+ \end{minipage}\\
+ \hline
\end{tabular}
\end{center}
diff --git a/docs/xen-api/xenapi-coversheet.tex b/docs/xen-api/xenapi-coversheet.tex
index cd71f80ff9..a8ed57a1a1 100644
--- a/docs/xen-api/xenapi-coversheet.tex
+++ b/docs/xen-api/xenapi-coversheet.tex
@@ -17,12 +17,12 @@
\newcommand{\coversheetlogo}{xen.eps}
%% Document date
-\newcommand{\datestring}{11th February 2008}
+\newcommand{\datestring}{24th July 2008}
\newcommand{\releasestatement}{Stable Release}
%% Document revision
-\newcommand{\revstring}{API Revision 1.0.5}
+\newcommand{\revstring}{API Revision 1.0.6}
%% Document authors
\newcommand{\docauthors}{
diff --git a/docs/xen-api/xenapi-datamodel-graph.dot b/docs/xen-api/xenapi-datamodel-graph.dot
index d1b84fdfda..4004edbf90 100644
--- a/docs/xen-api/xenapi-datamodel-graph.dot
+++ b/docs/xen-api/xenapi-datamodel-graph.dot
@@ -14,7 +14,7 @@ fontname="Verdana";
node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user XSPolicy ACMPolicy;
node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics PBD_metrics VM_guest_metrics host_metrics;
-node [shape=box]; host_cpu console
+node [shape=box]; DPCI PPCI host_cpu console
session -> host [ arrowhead="none" ]
session -> user [ arrowhead="none" ]
VM -> VM_metrics [ arrowhead="none" ]
@@ -22,7 +22,7 @@ VM -> VM_guest_metrics [ arrowhead="none" ]
VM -> console [ arrowhead="crow" ]
host -> PBD [ arrowhead="crow", arrowtail="none" ]
host -> host_metrics [ arrowhead="none" ]
-host -> host_cpu [ arrowhead="none" ]
+host -> host_cpu [ arrowhead="crow", arrowtail="none" ]
VIF -> VM [ arrowhead="none", arrowtail="crow" ]
VIF -> network [ arrowhead="none", arrowtail="crow" ]
VIF -> VIF_metrics [ arrowhead="none" ]
@@ -38,4 +38,7 @@ VTPM -> VM [ arrowhead="none", arrowtail="crow" ]
VBD -> VBD_metrics [ arrowhead="none" ]
XSPolicy -> host [ arrowhead="none" ]
XSPolicy -> ACMPolicy [ arrowhead="none" ]
+DPCI -> VM [ arrowhead="none", arrowtail="crow" ]
+DPCI -> PPCI [ arrowhead="none" ]
+PPCI -> host [ arrowhead="none", arrowtail="crow" ]
}
diff --git a/docs/xen-api/xenapi-datamodel.tex b/docs/xen-api/xenapi-datamodel.tex
index 94f5feb0d5..7589489ae9 100644
--- a/docs/xen-api/xenapi-datamodel.tex
+++ b/docs/xen-api/xenapi-datamodel.tex
@@ -44,6 +44,8 @@ Name & Description \\
{\tt crashdump} & A VM crashdump \\
{\tt VTPM} & A virtual TPM device \\
{\tt console} & A console \\
+{\tt DPCI} & A pass-through PCI device \\
+{\tt PPCI} & A physical PCI device \\
{\tt user} & A user of the system \\
{\tt debug} & A basic class for testing \\
{\tt XSPolicy} & A class for handling Xen Security Policies \\
@@ -70,6 +72,8 @@ PIF.network & network.PIFs & one-to-many\\
SR.VDIs & VDI.SR & many-to-one\\
VTPM.VM & VM.VTPMs & one-to-many\\
console.VM & VM.consoles & one-to-many\\
+DPCI.VM & VM.DPCIs & one-to-many\\
+PPCI.host & host.PPCIs & one-to-many\\
host.resident\_VMs & VM.resident\_on & many-to-one\\
host.host\_CPUs & host\_cpu.host & many-to-one\\
\hline
@@ -1402,6 +1406,7 @@ $\mathit{RO}_\mathit{run}$ & {\tt VIFs} & (VIF ref) Set & virtual network inter
$\mathit{RO}_\mathit{run}$ & {\tt VBDs} & (VBD ref) Set & virtual block devices \\
$\mathit{RO}_\mathit{run}$ & {\tt crash\_dumps} & (crashdump ref) Set & crash dumps associated with this VM \\
$\mathit{RO}_\mathit{run}$ & {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\
+$\mathit{RO}_\mathit{run}$ & {\tt DPCIs} & (DPCI ref) Set & pass-through PCI devices \\
$\mathit{RW}$ & {\tt PV/bootloader} & string & name of or path to bootloader \\
$\mathit{RW}$ & {\tt PV/kernel} & string & path to the kernel \\
$\mathit{RW}$ & {\tt PV/ramdisk} & string & path to the initrd \\
@@ -3413,6 +3418,38 @@ value of the field
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
+\subsubsection{RPC name:~get\_DPCIs}
+
+{\bf Overview:}
+Get the DPCIs field of the given VM.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((DPCI ref) Set) get_DPCIs (session_id s, VM ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(DPCI ref) Set
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
\subsubsection{RPC name:~get\_PV\_bootloader}
{\bf Overview:}
@@ -5480,6 +5517,7 @@ $\mathit{RO}_\mathit{run}$ & {\tt PIFs} & (PIF ref) Set & physical network inte
$\mathit{RW}$ & {\tt suspend\_image\_sr} & SR ref & The SR in which VDIs for suspend images are created \\
$\mathit{RW}$ & {\tt crash\_dump\_sr} & SR ref & The SR in which VDIs for crash dumps are created \\
$\mathit{RO}_\mathit{run}$ & {\tt PBDs} & (PBD ref) Set & physical blockdevices \\
+$\mathit{RO}_\mathit{run}$ & {\tt PPCIs} & (PPCI ref) Set & physical PCI devices \\
$\mathit{RO}_\mathit{run}$ & {\tt host\_CPUs} & (host\_cpu ref) Set & The physical CPUs on this host \\
$\mathit{RO}_\mathit{run}$ & {\tt metrics} & host\_metrics ref & metrics associated with this host \\
\hline
@@ -6774,6 +6812,38 @@ value of the field
\vspace{0.3cm}
\vspace{0.3cm}
\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PPCIs}
+
+{\bf Overview:}
+Get the PPCIs field of the given host.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((PPCI ref) Set) get_PPCIs (session_id s, host ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt host ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(PPCI ref) Set
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
\subsubsection{RPC name:~get\_host\_CPUs}
{\bf Overview:}
@@ -14464,6 +14534,1195 @@ all fields from the object
\vspace{1cm}
\newpage
+\section{Class: DPCI}
+\subsection{Fields for class: DPCI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DPCI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+pass-through PCI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\
+$\mathit{RO}_\mathit{inst}$ & {\tt VM} & VM ref & the virtual machine \\
+$\mathit{RO}_\mathit{inst}$ & {\tt PPCI} & PPCI ref & the physical PCI device \\
+$\mathit{RO}_\mathit{inst}$ & {\tt hotplug\_slot} & int & the slot number to which this PCI device is inserted \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_domain} & int & the virtual domain number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_bus} & int & the virtual bus number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_slot} & int & the virtual slot number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_func} & int & the virtual func number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_name} & string & the virtual PCI name \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: DPCI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:}
+Return a list of all the DPCIs known to the system.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((DPCI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(DPCI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:}
+Get the uuid field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_uuid (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_VM}
+
+{\bf Overview:}
+Get the VM field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (VM ref) get_VM (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+VM ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PPCI}
+
+{\bf Overview:}
+Get the PPCI field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (PPCI ref) get_PPCI (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+PPCI ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_hotplug\_slot}
+
+{\bf Overview:}
+Get the hotplug\_slot field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_hotplug_slot (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_domain}
+
+{\bf Overview:}
+Get the virtual\_domain field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_domain (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_bus}
+
+{\bf Overview:}
+Get the virtual\_bus field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_bus (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_slot}
+
+{\bf Overview:}
+Get the virtual\_slot field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_slot (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_func}
+
+{\bf Overview:}
+Get the virtual\_func field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_func (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_name}
+
+{\bf Overview:}
+Get the virtual\_name field of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_virtual_name (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~create}
+
+{\bf Overview:}
+Create a new DPCI instance, and return its handle.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (DPCI ref) create (session_id s, DPCI record args)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI record } & args & All constructor arguments \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+DPCI ref
+}
+
+
+reference to the newly created object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~destroy}
+
+{\bf Overview:}
+Destroy the specified DPCI instance.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} void destroy (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+void
+}
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:}
+Get a reference to the DPCI instance with the specified UUID.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (DPCI ref) get_by_uuid (session_id s, string uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+DPCI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:}
+Get a record containing the current state of the given DPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (DPCI record) get_record (session_id s, DPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+DPCI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
+\section{Class: PPCI}
+\subsection{Fields for class: PPCI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PPCI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+physical PCI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\
+$\mathit{RO}_\mathit{run}$ & {\tt host} & host ref & the physical machine to which this PPCI is connected \\
+$\mathit{RO}_\mathit{run}$ & {\tt domain} & int & the domain number \\
+$\mathit{RO}_\mathit{run}$ & {\tt bus} & int & the bus number \\
+$\mathit{RO}_\mathit{run}$ & {\tt slot} & int & the slot number \\
+$\mathit{RO}_\mathit{run}$ & {\tt func} & int & the func number \\
+$\mathit{RO}_\mathit{run}$ & {\tt name} & string & the PCI name \\
+$\mathit{RO}_\mathit{run}$ & {\tt vendor\_id} & int & the vendor ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt vendor\_name} & string & the vendor name \\
+$\mathit{RO}_\mathit{run}$ & {\tt device\_id} & int & the device ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt device\_name} & string & the device name \\
+$\mathit{RO}_\mathit{run}$ & {\tt revision\_id} & int & the revision ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt class\_code} & int & the class code \\
+$\mathit{RO}_\mathit{run}$ & {\tt class\_name} & string & the class name \\
+$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_vendor\_id} & int & the subsystem vendor ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_vendor\_name} & string & the subsystem vendor name \\
+$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_id} & int & the subsystem ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt subsystem\_name} & string & the subsystem name \\
+$\mathit{RO}_\mathit{run}$ & {\tt driver} & string & the driver name \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: PPCI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:}
+Return a list of all the PPCIs known to the system.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((PPCI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(PPCI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:}
+Get the uuid field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_uuid (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_host}
+
+{\bf Overview:}
+Get the host field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (host ref) get_host (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+host ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_domain}
+
+{\bf Overview:}
+Get the domain field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_domain (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_bus}
+
+{\bf Overview:}
+Get the bus field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_bus (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_slot}
+
+{\bf Overview:}
+Get the slot field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_slot (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_func}
+
+{\bf Overview:}
+Get the func field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_func (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_name}
+
+{\bf Overview:}
+Get the name field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_vendor\_id}
+
+{\bf Overview:}
+Get the vendor\_id field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_vendor_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_vendor\_name}
+
+{\bf Overview:}
+Get the vendor\_name field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_vendor_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_device\_id}
+
+{\bf Overview:}
+Get the device\_id field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_device_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_device\_name}
+
+{\bf Overview:}
+Get the device\_name field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_device_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_revision\_id}
+
+{\bf Overview:}
+Get the revision\_id field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_revision_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_class\_code}
+
+{\bf Overview:}
+Get the class\_code field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_class_code (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_class\_name}
+
+{\bf Overview:}
+Get the class\_name field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_class_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_vendor\_id}
+
+{\bf Overview:}
+Get the subsystem\_vendor\_id field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_subsystem_vendor_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_vendor\_name}
+
+{\bf Overview:}
+Get the subsystem\_vendor\_name field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_subsystem_vendor_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_id}
+
+{\bf Overview:}
+Get the subsystem\_id field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_subsystem_id (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_subsystem\_name}
+
+{\bf Overview:}
+Get the subsystem\_name field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_subsystem_name (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_driver}
+
+{\bf Overview:}
+Get the driver field of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_driver (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:}
+Get a reference to the PPCI instance with the specified UUID.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (PPCI ref) get_by_uuid (session_id s, string uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+PPCI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:}
+Get a record containing the current state of the given PPCI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (PPCI record) get_record (session_id s, PPCI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PPCI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+PPCI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
\section{Class: user}
\subsection{Fields for class: user}
\begin{longtable}{|lllp{0.38\textwidth}|}
diff --git a/extras/mini-os/fs-front.c b/extras/mini-os/fs-front.c
index f199c348a5..c81f1a66c9 100644
--- a/extras/mini-os/fs-front.c
+++ b/extras/mini-os/fs-front.c
@@ -50,6 +50,8 @@
struct fs_request;
struct fs_import *fs_import;
+void *alloc_buffer_page(struct fs_request *req, domid_t domid, grant_ref_t *gref);
+void free_buffer_page(struct fs_request *req);
/******************************************************************************/
/* RING REQUEST/RESPONSES HANDLING */
@@ -57,13 +59,21 @@ struct fs_import *fs_import;
struct fs_request
{
- void *page;
- grant_ref_t gref;
+ void *private1; /* Specific to request type */
+ void *private2;
struct thread *thread; /* Thread blocked on this request */
struct fsif_response shadow_rsp; /* Response copy writen by the
interrupt handler */
};
+struct fs_rw_gnts
+{
+ /* TODO 16 bit? */
+ int count;
+ grant_ref_t grefs[FSIF_NR_READ_GNTS];
+ void *pages[FSIF_NR_READ_GNTS];
+};
+
/* Ring operations:
* FSIF ring is used differently to Linux-like split devices. This stems from
* the fact that no I/O request queue is present. The use of some of the macros
@@ -177,6 +187,8 @@ int fs_open(struct fs_import *import, char *file)
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ grant_ref_t gref;
+ void *buffer;
RING_IDX back_req_id;
struct fsif_request *req;
int fd;
@@ -189,14 +201,15 @@ int fs_open(struct fs_import *import, char *file)
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_open call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref id=%d\n", fsr->gref);
+ buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+ DEBUG("gref id=%d\n", gref);
fsr->thread = current;
- sprintf(fsr->page, "%s", file);
+ sprintf(buffer, "%s", file);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_FILE_OPEN;
req->id = priv_req_id;
- req->u.fopen.gref = fsr->gref;
+ req->u.fopen.gref = gref;
/* Set blocked flag before commiting the request, thus avoiding missed
* response race */
@@ -207,6 +220,7 @@ int fs_open(struct fs_import *import, char *file)
/* Read the response */
fd = (int)fsr->shadow_rsp.ret_val;
DEBUG("The following FD returned: %d\n", fd);
+ free_buffer_page(fsr);
add_id_to_freelist(priv_req_id, import->freelist);
return fd;
@@ -254,11 +268,13 @@ ssize_t fs_read(struct fs_import *import, int fd, void *buf,
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ struct fs_rw_gnts gnts;
RING_IDX back_req_id;
struct fsif_request *req;
ssize_t ret;
+ int i;
- BUG_ON(len > PAGE_SIZE);
+ BUG_ON(len > PAGE_SIZE * FSIF_NR_READ_GNTS);
/* Prepare request for the backend */
back_req_id = reserve_fsif_request(import);
@@ -268,18 +284,28 @@ ssize_t fs_read(struct fs_import *import, int fd, void *buf,
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_read call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
- fsr->thread = current;
- memset(fsr->page, 0, PAGE_SIZE);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_FILE_READ;
req->id = priv_req_id;
req->u.fread.fd = fd;
- req->u.fread.gref = fsr->gref;
req->u.fread.len = len;
req->u.fread.offset = offset;
+
+ ASSERT(len > 0);
+ gnts.count = ((len - 1) / PAGE_SIZE) + 1;
+ for(i=0; i<gnts.count; i++)
+ {
+ gnts.pages[i] = (void *)alloc_page();
+ gnts.grefs[i] = gnttab_grant_access(import->dom_id,
+ virt_to_mfn(gnts.pages[i]),
+ 0);
+ memset(gnts.pages[i], 0, PAGE_SIZE);
+ req->u.fread.grefs[i] = gnts.grefs[i];
+ }
+ fsr->thread = current;
+
/* Set blocked flag before commiting the request, thus avoiding missed
* response race */
block(current);
@@ -290,7 +316,19 @@ ssize_t fs_read(struct fs_import *import, int fd, void *buf,
ret = (ssize_t)fsr->shadow_rsp.ret_val;
DEBUG("The following ret value returned %d\n", ret);
if(ret > 0)
- memcpy(buf, fsr->page, ret);
+ {
+ ssize_t to_copy = ret, current_copy;
+ for(i=0; i<gnts.count; i++)
+ {
+ gnttab_end_access(gnts.grefs[i]);
+ current_copy = to_copy > PAGE_SIZE ? PAGE_SIZE : to_copy;
+ if(current_copy > 0)
+ memcpy(buf, gnts.pages[i], current_copy);
+ to_copy -= current_copy;
+ buf = (char*) buf + current_copy;
+ free_page(gnts.pages[i]);
+ }
+ }
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -301,11 +339,13 @@ ssize_t fs_write(struct fs_import *import, int fd, void *buf,
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ struct fs_rw_gnts gnts;
RING_IDX back_req_id;
struct fsif_request *req;
- ssize_t ret;
+ ssize_t ret, to_copy;
+ int i;
- BUG_ON(len > PAGE_SIZE);
+ BUG_ON(len > PAGE_SIZE * FSIF_NR_WRITE_GNTS);
/* Prepare request for the backend */
back_req_id = reserve_fsif_request(import);
@@ -315,20 +355,35 @@ ssize_t fs_write(struct fs_import *import, int fd, void *buf,
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_read call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
- fsr->thread = current;
- memcpy(fsr->page, buf, len);
- BUG_ON(len > PAGE_SIZE);
- memset((char *)fsr->page + len, 0, PAGE_SIZE - len);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_FILE_WRITE;
req->id = priv_req_id;
req->u.fwrite.fd = fd;
- req->u.fwrite.gref = fsr->gref;
req->u.fwrite.len = len;
req->u.fwrite.offset = offset;
+ ASSERT(len > 0);
+ gnts.count = ((len - 1) / PAGE_SIZE) + 1;
+ to_copy = len;
+ for(i=0; i<gnts.count; i++)
+ {
+ int current_copy = (to_copy > PAGE_SIZE ? PAGE_SIZE : to_copy);
+ gnts.pages[i] = (void *)alloc_page();
+ gnts.grefs[i] = gnttab_grant_access(import->dom_id,
+ virt_to_mfn(gnts.pages[i]),
+ 0);
+ memcpy(gnts.pages[i], buf, current_copy);
+ if(current_copy < PAGE_SIZE)
+ memset((char *)gnts.pages[i] + current_copy,
+ 0,
+ PAGE_SIZE - current_copy);
+ req->u.fwrite.grefs[i] = gnts.grefs[i];
+ to_copy -= current_copy;
+ buf = (char*) buf + current_copy;
+ }
+ fsr->thread = current;
+
/* Set blocked flag before commiting the request, thus avoiding missed
* response race */
block(current);
@@ -338,6 +393,11 @@ ssize_t fs_write(struct fs_import *import, int fd, void *buf,
/* Read the response */
ret = (ssize_t)fsr->shadow_rsp.ret_val;
DEBUG("The following ret value returned %d\n", ret);
+ for(i=0; i<gnts.count; i++)
+ {
+ gnttab_end_access(gnts.grefs[i]);
+ free_page(gnts.pages[i]);
+ }
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -361,15 +421,12 @@ int fs_stat(struct fs_import *import,
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_stat call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
fsr->thread = current;
- memset(fsr->page, 0, PAGE_SIZE);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_STAT;
req->id = priv_req_id;
req->u.fstat.fd = fd;
- req->u.fstat.gref = fsr->gref;
/* Set blocked flag before commiting the request, thus avoiding missed
* response race */
@@ -380,7 +437,9 @@ int fs_stat(struct fs_import *import,
/* Read the response */
ret = (int)fsr->shadow_rsp.ret_val;
DEBUG("Following ret from fstat: %d\n", ret);
- memcpy(stat, fsr->page, sizeof(struct fsif_stat_response));
+ memcpy(stat,
+ &fsr->shadow_rsp.fstat,
+ sizeof(struct fsif_stat_response));
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -430,6 +489,8 @@ int fs_remove(struct fs_import *import, char *file)
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ grant_ref_t gref;
+ void *buffer;
RING_IDX back_req_id;
struct fsif_request *req;
int ret;
@@ -442,14 +503,15 @@ int fs_remove(struct fs_import *import, char *file)
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_open call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
+ buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+ DEBUG("gref=%d\n", gref);
fsr->thread = current;
- sprintf(fsr->page, "%s", file);
+ sprintf(buffer, "%s", file);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_REMOVE;
req->id = priv_req_id;
- req->u.fremove.gref = fsr->gref;
+ req->u.fremove.gref = gref;
/* Set blocked flag before commiting the request, thus avoiding missed
* response race */
@@ -460,6 +522,7 @@ int fs_remove(struct fs_import *import, char *file)
/* Read the response */
ret = (int)fsr->shadow_rsp.ret_val;
DEBUG("The following ret: %d\n", ret);
+ free_buffer_page(fsr);
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -472,6 +535,8 @@ int fs_rename(struct fs_import *import,
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ grant_ref_t gref;
+ void *buffer;
RING_IDX back_req_id;
struct fsif_request *req;
int ret;
@@ -486,15 +551,16 @@ int fs_rename(struct fs_import *import,
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_open call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
+ buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+ DEBUG("gref=%d\n", gref);
fsr->thread = current;
- sprintf(fsr->page, "%s%s%c%s%s",
+ sprintf(buffer, "%s%s%c%s%s",
old_header, old_file_name, '\0', new_header, new_file_name);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_RENAME;
req->id = priv_req_id;
- req->u.frename.gref = fsr->gref;
+ req->u.frename.gref = gref;
req->u.frename.old_name_offset = strlen(old_header);
req->u.frename.new_name_offset = strlen(old_header) +
strlen(old_file_name) +
@@ -511,6 +577,7 @@ int fs_rename(struct fs_import *import,
/* Read the response */
ret = (int)fsr->shadow_rsp.ret_val;
DEBUG("The following ret: %d\n", ret);
+ free_buffer_page(fsr);
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -521,6 +588,8 @@ int fs_create(struct fs_import *import, char *name,
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ grant_ref_t gref;
+ void *buffer;
RING_IDX back_req_id;
struct fsif_request *req;
int ret;
@@ -533,14 +602,15 @@ int fs_create(struct fs_import *import, char *name,
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_create call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
+ buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+ DEBUG("gref=%d\n", gref);
fsr->thread = current;
- sprintf(fsr->page, "%s", name);
+ sprintf(buffer, "%s", name);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_CREATE;
req->id = priv_req_id;
- req->u.fcreate.gref = fsr->gref;
+ req->u.fcreate.gref = gref;
req->u.fcreate.directory = directory;
req->u.fcreate.mode = mode;
@@ -553,6 +623,7 @@ int fs_create(struct fs_import *import, char *name,
/* Read the response */
ret = (int)fsr->shadow_rsp.ret_val;
DEBUG("The following ret: %d\n", ret);
+ free_buffer_page(fsr);
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -563,6 +634,8 @@ char** fs_list(struct fs_import *import, char *name,
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ grant_ref_t gref;
+ void *buffer;
RING_IDX back_req_id;
struct fsif_request *req;
char **files, *current_file;
@@ -579,14 +652,15 @@ char** fs_list(struct fs_import *import, char *name,
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_list call is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
+ buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+ DEBUG("gref=%d\n", gref);
fsr->thread = current;
- sprintf(fsr->page, "%s", name);
+ sprintf(buffer, "%s", name);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_DIR_LIST;
req->id = priv_req_id;
- req->u.flist.gref = fsr->gref;
+ req->u.flist.gref = gref;
req->u.flist.offset = offset;
/* Set blocked flag before commiting the request, thus avoiding missed
@@ -600,7 +674,7 @@ char** fs_list(struct fs_import *import, char *name,
files = NULL;
if(*nr_files <= 0) goto exit;
files = malloc(sizeof(char*) * (*nr_files));
- current_file = fsr->page;
+ current_file = buffer;
for(i=0; i<*nr_files; i++)
{
files[i] = strdup(current_file);
@@ -608,6 +682,7 @@ char** fs_list(struct fs_import *import, char *name,
}
if(has_more != NULL)
*has_more = fsr->shadow_rsp.ret_val & HAS_MORE_FLAG;
+ free_buffer_page(fsr);
add_id_to_freelist(priv_req_id, import->freelist);
exit:
return files;
@@ -655,6 +730,8 @@ int64_t fs_space(struct fs_import *import, char *location)
{
struct fs_request *fsr;
unsigned short priv_req_id;
+ grant_ref_t gref;
+ void *buffer;
RING_IDX back_req_id;
struct fsif_request *req;
int64_t ret;
@@ -667,14 +744,15 @@ int64_t fs_space(struct fs_import *import, char *location)
priv_req_id = get_id_from_freelist(import->freelist);
DEBUG("Request id for fs_space is: %d\n", priv_req_id);
fsr = &import->requests[priv_req_id];
- DEBUG("gref=%d\n", fsr->gref);
+ buffer = alloc_buffer_page(fsr, import->dom_id, &gref);
+ DEBUG("gref=%d\n", gref);
fsr->thread = current;
- sprintf(fsr->page, "%s", location);
+ sprintf(buffer, "%s", location);
req = RING_GET_REQUEST(&import->ring, back_req_id);
req->type = REQ_FS_SPACE;
req->id = priv_req_id;
- req->u.fspace.gref = fsr->gref;
+ req->u.fspace.gref = gref;
/* Set blocked flag before commiting the request, thus avoiding missed
* response race */
@@ -685,6 +763,7 @@ int64_t fs_space(struct fs_import *import, char *location)
/* Read the response */
ret = (int64_t)fsr->shadow_rsp.ret_val;
DEBUG("The following returned: %lld\n", ret);
+ free_buffer_page(fsr);
add_id_to_freelist(priv_req_id, import->freelist);
return ret;
@@ -732,6 +811,23 @@ int fs_sync(struct fs_import *import, int fd)
/* END OF INDIVIDUAL FILE OPERATIONS */
/******************************************************************************/
+void *alloc_buffer_page(struct fs_request *req, domid_t domid, grant_ref_t *gref)
+{
+ void *page;
+
+ page = (void *)alloc_page();
+ *gref = gnttab_grant_access(domid, virt_to_mfn(page), 0);
+ req->private1 = page;
+ req->private2 = (void *)(uintptr_t)(*gref);
+
+ return page;
+}
+
+void free_buffer_page(struct fs_request *req)
+{
+ gnttab_end_access((grant_ref_t)(uintptr_t)req->private2);
+ free_page(req->private1);
+}
static void fsfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
{
@@ -797,15 +893,7 @@ static void alloc_request_table(struct fs_import *import)
import->freelist = xmalloc_array(unsigned short, import->nr_entries + 1);
memset(import->freelist, 0, sizeof(unsigned short) * (import->nr_entries + 1));
for(i=0; i<import->nr_entries; i++)
- {
- /* TODO: that's a lot of memory */
- requests[i].page = (void *)alloc_page();
- requests[i].gref = gnttab_grant_access(import->dom_id,
- virt_to_mfn(requests[i].page),
- 0);
- //printk(" ===>> Page=%lx, gref=%d, mfn=%lx\n", requests[i].page, requests[i].gref, virt_to_mfn(requests[i].page));
add_id_to_freelist(i, import->freelist);
- }
import->requests = requests;
}
@@ -818,22 +906,27 @@ static void alloc_request_table(struct fs_import *import)
void test_fs_import(void *data)
{
struct fs_import *import = (struct fs_import *)data;
- int ret, fd, i;
+ int ret, fd, i, repeat_count;
int32_t nr_files;
char buffer[1024];
ssize_t offset;
char **files;
long ret64;
-
+ struct fsif_stat_response stat;
+
+ repeat_count = 10;
/* Sleep for 1s and then try to open a file */
msleep(1000);
+again:
ret = fs_create(import, "mini-os-created-directory", 1, 0777);
printk("Directory create: %d\n", ret);
- ret = fs_create(import, "mini-os-created-directory/mini-os-created-file", 0, 0666);
+ sprintf(buffer, "mini-os-created-directory/mini-os-created-file-%d",
+ repeat_count);
+ ret = fs_create(import, buffer, 0, 0666);
printk("File create: %d\n", ret);
- fd = fs_open(import, "mini-os-created-directory/mini-os-created-file");
+ fd = fs_open(import, buffer);
printk("File descriptor: %d\n", fd);
if(fd < 0) return;
@@ -847,7 +940,16 @@ void test_fs_import(void *data)
return;
offset += ret;
}
-
+ ret = fs_stat(import, fd, &stat);
+ printk("Ret after stat: %d\n", ret);
+ printk(" st_mode=%o\n", stat.stat_mode);
+ printk(" st_uid =%d\n", stat.stat_uid);
+ printk(" st_gid =%d\n", stat.stat_gid);
+ printk(" st_size=%ld\n", stat.stat_size);
+ printk(" st_atime=%ld\n", stat.stat_atime);
+ printk(" st_mtime=%ld\n", stat.stat_mtime);
+ printk(" st_ctime=%ld\n", stat.stat_ctime);
+
ret = fs_close(import, fd);
printk("Closed fd: %d, ret=%d\n", fd, ret);
@@ -858,6 +960,9 @@ void test_fs_import(void *data)
ret64 = fs_space(import, "/");
printk("Free space: %lld (=%lld Mb)\n", ret64, (ret64 >> 20));
+ repeat_count--;
+ if(repeat_count > 0)
+ goto again;
}
@@ -924,20 +1029,21 @@ static int init_fs_import(struct fs_import *import)
xenbus_transaction_t xbt;
char nodename[1024], r_nodename[1024], token[128], *message = NULL;
struct fsif_sring *sring;
- int retry = 0;
+ int i, retry = 0;
domid_t self_id;
xenbus_event_queue events = NULL;
printk("Initialising FS fortend to backend dom %d\n", import->dom_id);
/* Allocate page for the shared ring */
- sring = (struct fsif_sring*) alloc_page();
- memset(sring, 0, PAGE_SIZE);
+ sring = (struct fsif_sring*) alloc_pages(FSIF_RING_SIZE_ORDER);
+ memset(sring, 0, PAGE_SIZE * FSIF_RING_SIZE_PAGES);
/* Init the shared ring */
SHARED_RING_INIT(sring);
+ ASSERT(FSIF_NR_READ_GNTS == FSIF_NR_WRITE_GNTS);
/* Init private frontend ring */
- FRONT_RING_INIT(&import->ring, sring, PAGE_SIZE);
+ FRONT_RING_INIT(&import->ring, sring, PAGE_SIZE * FSIF_RING_SIZE_PAGES);
import->nr_entries = import->ring.nr_ents;
/* Allocate table of requests */
@@ -945,7 +1051,11 @@ static int init_fs_import(struct fs_import *import)
init_SEMAPHORE(&import->reqs_sem, import->nr_entries);
/* Grant access to the shared ring */
- import->gnt_ref = gnttab_grant_access(import->dom_id, virt_to_mfn(sring), 0);
+ for(i=0; i<FSIF_RING_SIZE_PAGES; i++)
+ import->gnt_refs[i] =
+ gnttab_grant_access(import->dom_id,
+ virt_to_mfn((char *)sring + i * PAGE_SIZE),
+ 0);
/* Allocate event channel */
BUG_ON(evtchn_alloc_unbound(import->dom_id,
@@ -969,13 +1079,27 @@ again:
err = xenbus_printf(xbt,
nodename,
- "ring-ref",
+ "ring-size",
"%u",
- import->gnt_ref);
+ FSIF_RING_SIZE_PAGES);
if (err) {
- message = "writing ring-ref";
+ message = "writing ring-size";
goto abort_transaction;
}
+
+ for(i=0; i<FSIF_RING_SIZE_PAGES; i++)
+ {
+ sprintf(r_nodename, "ring-ref-%d", i);
+ err = xenbus_printf(xbt,
+ nodename,
+ r_nodename,
+ "%u",
+ import->gnt_refs[i]);
+ if (err) {
+ message = "writing ring-refs";
+ goto abort_transaction;
+ }
+ }
err = xenbus_printf(xbt,
nodename,
diff --git a/extras/mini-os/gntmap.c b/extras/mini-os/gntmap.c
new file mode 100644
index 0000000000..abbd91ab33
--- /dev/null
+++ b/extras/mini-os/gntmap.c
@@ -0,0 +1,252 @@
+/*
+ * Manages grant mappings from other domains.
+ *
+ * Diego Ongaro <diego.ongaro@citrix.com>, July 2008
+ *
+ * Files of type FTYPE_GNTMAP contain a gntmap, which is an array of
+ * (host address, grant handle) pairs. Grant handles come from a hypervisor map
+ * operation and are needed for the corresponding unmap.
+ *
+ * This is a rather naive implementation in terms of performance. If we start
+ * using it frequently, there's definitely some low-hanging fruit here.
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <os.h>
+#include <xmalloc.h>
+#include <errno.h>
+#include <xen/grant_table.h>
+#include <inttypes.h>
+#include "gntmap.h"
+
+#define DEFAULT_MAX_GRANTS 128
+
+struct gntmap_entry {
+ unsigned long host_addr;
+ grant_handle_t handle;
+};
+
+static inline int
+gntmap_entry_used(struct gntmap_entry *entry)
+{
+ return entry->host_addr != 0;
+}
+
+static struct gntmap_entry*
+gntmap_find_free_entry(struct gntmap *map)
+{
+ int i;
+
+ for (i = 0; i < map->nentries; i++) {
+ if (!gntmap_entry_used(&map->entries[i]))
+ return &map->entries[i];
+ }
+
+#ifdef GNTMAP_DEBUG
+ printk("gntmap_find_free_entry(map=%p): all %d entries full\n",
+ map, map->nentries);
+#endif
+ return NULL;
+}
+
+static struct gntmap_entry*
+gntmap_find_entry(struct gntmap *map, unsigned long addr)
+{
+ int i;
+
+ for (i = 0; i < map->nentries; i++) {
+ if (map->entries[i].host_addr == addr)
+ return &map->entries[i];
+ }
+ return NULL;
+}
+
+int
+gntmap_set_max_grants(struct gntmap *map, int count)
+{
+#ifdef GNTMAP_DEBUG
+ printk("gntmap_set_max_grants(map=%p, count=%d)\n", map, count);
+#endif
+
+ if (map->nentries != 0)
+ return -EBUSY;
+
+ map->entries = xmalloc_array(struct gntmap_entry, count);
+ if (map->entries == NULL)
+ return -ENOMEM;
+
+ memset(map->entries, 0, sizeof(struct gntmap_entry) * count);
+ map->nentries = count;
+ return 0;
+}
+
+static int
+_gntmap_map_grant_ref(struct gntmap_entry *entry,
+ unsigned long host_addr,
+ uint32_t domid,
+ uint32_t ref,
+ int writable)
+{
+ struct gnttab_map_grant_ref op;
+ int rc;
+
+ op.ref = (grant_ref_t) ref;
+ op.dom = (domid_t) domid;
+ op.host_addr = (uint64_t) host_addr;
+ op.flags = GNTMAP_host_map;
+ if (!writable)
+ op.flags |= GNTMAP_readonly;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+ if (rc != 0 || op.status != GNTST_okay) {
+ printk("GNTTABOP_map_grant_ref failed: "
+ "returned %d, status %" PRId16 "\n",
+ rc, op.status);
+ return rc != 0 ? rc : op.status;
+ }
+
+ entry->host_addr = host_addr;
+ entry->handle = op.handle;
+ return 0;
+}
+
+static int
+_gntmap_unmap_grant_ref(struct gntmap_entry *entry)
+{
+ struct gnttab_unmap_grant_ref op;
+ int rc;
+
+ op.host_addr = (uint64_t) entry->host_addr;
+ op.dev_bus_addr = 0;
+ op.handle = entry->handle;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+ if (rc != 0 || op.status != GNTST_okay) {
+ printk("GNTTABOP_unmap_grant_ref failed: "
+ "returned %d, status %" PRId16 "\n",
+ rc, op.status);
+ return rc != 0 ? rc : op.status;
+ }
+
+ entry->host_addr = 0;
+ return 0;
+}
+
+int
+gntmap_munmap(struct gntmap *map, unsigned long start_address, int count)
+{
+ int i, rc;
+ struct gntmap_entry *ent;
+
+#ifdef GNTMAP_DEBUG
+ printk("gntmap_munmap(map=%p, start_address=%lx, count=%d)\n",
+ map, start_address, count);
+#endif
+
+ for (i = 0; i < count; i++) {
+ ent = gntmap_find_entry(map, start_address + PAGE_SIZE * i);
+ if (ent == NULL) {
+ printk("gntmap: tried to munmap unknown page\n");
+ return -EINVAL;
+ }
+
+ rc = _gntmap_unmap_grant_ref(ent);
+ if (rc != 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+void*
+gntmap_map_grant_refs(struct gntmap *map,
+ uint32_t count,
+ uint32_t *domids,
+ int domids_stride,
+ uint32_t *refs,
+ int writable)
+{
+ unsigned long addr;
+ struct gntmap_entry *ent;
+ int i;
+
+#ifdef GNTMAP_DEBUG
+ printk("gntmap_map_grant_refs(map=%p, count=%" PRIu32 ", "
+ "domids=%p [%" PRIu32 "...], domids_stride=%d, "
+ "refs=%p [%" PRIu32 "...], writable=%d)\n",
+ map, count,
+ domids, domids == NULL ? 0 : domids[0], domids_stride,
+ refs, refs == NULL ? 0 : refs[0], writable);
+#endif
+
+ (void) gntmap_set_max_grants(map, DEFAULT_MAX_GRANTS);
+
+ addr = allocate_ondemand((unsigned long) count, 1);
+ if (addr == 0)
+ return NULL;
+
+ for (i = 0; i < count; i++) {
+ ent = gntmap_find_free_entry(map);
+ if (ent == NULL ||
+ _gntmap_map_grant_ref(ent,
+ addr + PAGE_SIZE * i,
+ domids[i * domids_stride],
+ refs[i],
+ writable) != 0) {
+
+ (void) gntmap_munmap(map, addr, i);
+ return NULL;
+ }
+ }
+
+ return (void*) addr;
+}
+
+void
+gntmap_init(struct gntmap *map)
+{
+#ifdef GNTMAP_DEBUG
+ printk("gntmap_init(map=%p)\n", map);
+#endif
+ map->nentries = 0;
+ map->entries = NULL;
+}
+
+void
+gntmap_fini(struct gntmap *map)
+{
+ struct gntmap_entry *ent;
+ int i;
+
+#ifdef GNTMAP_DEBUG
+ printk("gntmap_fini(map=%p)\n", map);
+#endif
+
+ for (i = 0; i < map->nentries; i++) {
+ ent = &map->entries[i];
+ if (gntmap_entry_used(ent))
+ (void) _gntmap_unmap_grant_ref(ent);
+ }
+
+ xfree(map->entries);
+ map->entries = NULL;
+ map->nentries = 0;
+}
diff --git a/extras/mini-os/include/fs.h b/extras/mini-os/include/fs.h
index d60f0c964b..cd8262e54d 100644
--- a/extras/mini-os/include/fs.h
+++ b/extras/mini-os/include/fs.h
@@ -5,6 +5,9 @@
#include <mini-os/semaphore.h>
#include <mini-os/types.h>
+#define FSIF_RING_SIZE_ORDER 1
+#define FSIF_RING_SIZE_PAGES (1<<FSIF_RING_SIZE_ORDER)
+
struct fs_import
{
domid_t dom_id; /* dom id of the exporting domain */
@@ -14,7 +17,7 @@ struct fs_import
unsigned int nr_entries; /* Number of entries in rings & request
array */
struct fsif_front_ring ring; /* frontend ring (contains shared ring) */
- int gnt_ref; /* grant reference to the shared ring */
+ u32 gnt_refs[FSIF_RING_SIZE_PAGES]; /* grant references to the shared ring */
evtchn_port_t local_port; /* local event channel port */
char *backend; /* XenBus location of the backend */
struct fs_request *requests; /* Table of requests */
diff --git a/extras/mini-os/include/gntmap.h b/extras/mini-os/include/gntmap.h
new file mode 100644
index 0000000000..fde53f39b1
--- /dev/null
+++ b/extras/mini-os/include/gntmap.h
@@ -0,0 +1,35 @@
+#ifndef __GNTMAP_H__
+#define __GNTMAP_H__
+
+#include <os.h>
+
+/*
+ * Please consider struct gntmap opaque. If instead you choose to disregard
+ * this message, I insist that you keep an eye out for raptors.
+ */
+struct gntmap {
+ int nentries;
+ struct gntmap_entry *entries;
+};
+
+int
+gntmap_set_max_grants(struct gntmap *map, int count);
+
+int
+gntmap_munmap(struct gntmap *map, unsigned long start_address, int count);
+
+void*
+gntmap_map_grant_refs(struct gntmap *map,
+ uint32_t count,
+ uint32_t *domids,
+ int domids_stride,
+ uint32_t *refs,
+ int writable);
+
+void
+gntmap_init(struct gntmap *map);
+
+void
+gntmap_fini(struct gntmap *map);
+
+#endif /* !__GNTMAP_H__ */
diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h
index b508c38f51..8822dd14a7 100644
--- a/extras/mini-os/include/lib.h
+++ b/extras/mini-os/include/lib.h
@@ -59,6 +59,7 @@
#include <stddef.h>
#include <xen/xen.h>
#include <xen/event_channel.h>
+#include "gntmap.h"
#ifdef HAVE_LIBC
#include <stdio.h>
@@ -138,6 +139,7 @@ enum fd_type {
FTYPE_XENBUS,
FTYPE_XC,
FTYPE_EVTCHN,
+ FTYPE_GNTMAP,
FTYPE_SOCKET,
FTYPE_TAP,
FTYPE_BLK,
@@ -168,6 +170,7 @@ extern struct file {
int bound;
} ports[MAX_EVTCHN_PORTS];
} evtchn;
+ struct gntmap gntmap;
struct {
struct netfront_dev *dev;
} tap;
diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c
index 0e88a76e71..457bde3c8d 100644
--- a/extras/mini-os/lib/sys.c
+++ b/extras/mini-os/lib/sys.c
@@ -84,6 +84,7 @@
#define NOFILE 32
extern int xc_evtchn_close(int fd);
extern int xc_interface_close(int fd);
+extern int xc_gnttab_close(int fd);
pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER;
struct file files[NOFILE] = {
@@ -230,8 +231,8 @@ int read(int fd, void *buf, size_t nbytes)
}
case FTYPE_FILE: {
ssize_t ret;
- if (nbytes > PAGE_SIZE)
- nbytes = PAGE_SIZE;
+ if (nbytes > PAGE_SIZE * FSIF_NR_READ_GNTS)
+ nbytes = PAGE_SIZE * FSIF_NR_READ_GNTS;
ret = fs_read(fs_import, files[fd].file.fd, buf, nbytes, files[fd].file.offset);
if (ret > 0) {
files[fd].file.offset += ret;
@@ -291,8 +292,8 @@ int write(int fd, const void *buf, size_t nbytes)
return nbytes;
case FTYPE_FILE: {
ssize_t ret;
- if (nbytes > PAGE_SIZE)
- nbytes = PAGE_SIZE;
+ if (nbytes > PAGE_SIZE * FSIF_NR_WRITE_GNTS)
+ nbytes = PAGE_SIZE * FSIF_NR_WRITE_GNTS;
ret = fs_write(fs_import, files[fd].file.fd, (void *) buf, nbytes, files[fd].file.offset);
if (ret > 0) {
files[fd].file.offset += ret;
@@ -401,6 +402,9 @@ int close(int fd)
case FTYPE_EVTCHN:
xc_evtchn_close(fd);
return 0;
+ case FTYPE_GNTMAP:
+ xc_gnttab_close(fd);
+ return 0;
case FTYPE_TAP:
shutdown_netfront(files[fd].tap.dev);
files[fd].type = FTYPE_NONE;
diff --git a/extras/mini-os/minios.mk b/extras/mini-os/minios.mk
index b7b7db8f2a..7ee19b3a86 100644
--- a/extras/mini-os/minios.mk
+++ b/extras/mini-os/minios.mk
@@ -21,6 +21,7 @@ DEF_CFLAGS += -g
#DEF_CFLAGS += -DFS_DEBUG
#DEF_CFLAGS += -DLIBC_DEBUG
DEF_CFLAGS += -DGNT_DEBUG
+DEF_CFLAGS += -DGNTMAP_DEBUG
else
DEF_CFLAGS += -O3
endif
diff --git a/extras/mini-os/pcifront.c b/extras/mini-os/pcifront.c
index a924a6e8d9..5b68d86b56 100644
--- a/extras/mini-os/pcifront.c
+++ b/extras/mini-os/pcifront.c
@@ -57,6 +57,7 @@ struct pcifront_dev *init_pcifront(char *_nodename)
int retry=0;
char* msg;
char* nodename = _nodename ? _nodename : "device/pci/0";
+ int dom;
struct pcifront_dev *dev;
@@ -64,12 +65,18 @@ struct pcifront_dev *init_pcifront(char *_nodename)
printk("******************* PCIFRONT for %s **********\n\n\n", nodename);
+ snprintf(path, sizeof(path), "%s/backend-id", nodename);
+ dom = xenbus_read_integer(path);
+ if (dom == -1) {
+ printk("no backend\n");
+ return NULL;
+ }
+
dev = malloc(sizeof(*dev));
memset(dev, 0, sizeof(*dev));
dev->nodename = strdup(nodename);
+ dev->dom = dom;
- snprintf(path, sizeof(path), "%s/backend-id", nodename);
- dev->dom = xenbus_read_integer(path);
evtchn_alloc_unbound(dev->dom, pcifront_handler, dev, &dev->evtchn);
dev->info = (struct xen_pci_sharedinfo*) alloc_page();
diff --git a/stubdom/Makefile b/stubdom/Makefile
index b82f668038..6a39c39e7d 100644
--- a/stubdom/Makefile
+++ b/stubdom/Makefile
@@ -3,17 +3,22 @@ MINI_OS = $(XEN_ROOT)/extras/mini-os
export XEN_OS=MiniOS
-CONFIG_QEMU=ioemu
-
export stubdom=y
export debug=y
include $(XEN_ROOT)/Config.mk
+override CONFIG_QEMU=ioemu
+
IOEMU_OPTIONS=--disable-sdl --disable-opengl --disable-gfx-check --disable-vnc-tls --disable-brlapi --disable-kqemu
+ZLIB_URL?=http://www.zlib.net
ZLIB_VERSION=1.2.3
+LIBPCI_URL?=http://www.kernel.org/pub/software/utils/pciutils
LIBPCI_VERSION=2.2.9
+NEWLIB_URL?=ftp://sources.redhat.com/pub/newlib
NEWLIB_VERSION=1.16.0
+LWIP_URL?=http://download.savannah.gnu.org/releases/lwip
LWIP_VERSION=1.3.0
+GRUB_URL?=http://alpha.gnu.org/gnu/grub
GRUB_VERSION=0.97
WGET=wget -c
@@ -75,7 +80,7 @@ endif
##############
newlib-$(NEWLIB_VERSION).tar.gz:
- $(WGET) ftp://sources.redhat.com/pub/newlib/$@
+ $(WGET) $(NEWLIB_URL)/$@
newlib-$(NEWLIB_VERSION): newlib-$(NEWLIB_VERSION).tar.gz
tar xzf $<
@@ -97,7 +102,7 @@ $(NEWLIB_STAMPFILE): mk-headers newlib-$(NEWLIB_VERSION)
############
zlib-$(ZLIB_VERSION).tar.gz:
- $(WGET) http://www.zlib.net/$@
+ $(WGET) $(ZLIB_URL)/$@
ZLIB_STAMPFILE=$(CROSS_ROOT)/$(GNU_TARGET_ARCH)-xen-elf/lib/libz.a
.PHONY: cross-zlib
@@ -114,7 +119,7 @@ $(ZLIB_STAMPFILE): zlib-$(ZLIB_VERSION).tar.gz $(NEWLIB_STAMPFILE)
##############
pciutils-$(LIBPCI_VERSION).tar.bz2:
- $(WGET) http://www.kernel.org/pub/software/utils/pciutils/$@
+ $(WGET) $(LIBPCI_URL)/$@
pciutils-$(LIBPCI_VERSION): pciutils-$(LIBPCI_VERSION).tar.bz2
tar xjf $<
@@ -132,7 +137,7 @@ $(LIBPCI_STAMPFILE): pciutils-$(LIBPCI_VERSION) $(NEWLIB_STAMPFILE) $(ZLIB_STAMP
$(MAKE) CC="$(CC) $(TARGET_CPPFLAGS) $(TARGET_CFLAGS) -I$(realpath $(MINI_OS)/include)" lib/libpci.a && \
$(INSTALL_DATA) lib/libpci.a $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/lib/ && \
$(INSTALL_DIR) $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci && \
- $(INSTALL_DATA) lib/{config,header,pci,types}.h $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \
+ $(INSTALL_DATA) lib/config.h lib/header.h lib/pci.h lib/types.h $(CROSS_PREFIX)/$(GNU_TARGET_ARCH)-xen-elf/include/pci/ \
)
######
@@ -140,7 +145,7 @@ $(LIBPCI_STAMPFILE): pciutils-$(LIBPCI_VERSION) $(NEWLIB_STAMPFILE) $(ZLIB_STAMP
######
lwip-$(LWIP_VERSION).tar.gz:
- $(WGET) http://download.savannah.gnu.org/releases/lwip/$@
+ $(WGET) $(LWIP_URL)/$@
lwip: lwip-$(LWIP_VERSION).tar.gz
tar xzf $<
@@ -154,7 +159,6 @@ lwip: lwip-$(LWIP_VERSION).tar.gz
.PHONY: $(CROSS_ROOT)
$(CROSS_ROOT): cross-newlib cross-zlib cross-libpci
-.PHONY: mk-headers
mk-headers:
mkdir -p include/xen && \
ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
@@ -191,6 +195,7 @@ endif
[ ! -h ioemu/config-host.h ] || rm -f ioemu/config-host.h
[ ! -h ioemu/config-host.mak ] || rm -f ioemu/config-host.mak
$(MAKE) -C $(MINI_OS) links
+ touch mk-headers
TARGETS_MINIOS=$(addprefix mini-os-,$(TARGETS))
$(TARGETS_MINIOS): mini-os-%:
@@ -247,7 +252,7 @@ c: $(CROSS_ROOT)
######
grub-$(GRUB_VERSION).tar.gz:
- $(WGET) ftp://alpha.gnu.org/gnu/grub/$@
+ $(WGET) $(GRUB_URL)/$@
grub-upstream: grub-$(GRUB_VERSION).tar.gz
tar xzf $<
@@ -291,20 +296,24 @@ pv-grub: mini-os-grub libxc grub
#########
ifeq ($(STUBDOM_SUPPORTED),1)
-install: install-ioemu install-grub
+install: install-readme install-ioemu install-grub
else
install:
endif
+install-readme:
+ $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
+ $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.stubdom
+
install-ioemu: ioemu-stubdom
$(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/bin"
$(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin"
$(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_PROG) mini-os-ioemu/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz"
+ $(INSTALL_DATA) mini-os-ioemu/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz"
install-grub: pv-grub
$(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_PROG) mini-os-grub/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/pv-grub.gz"
+ $(INSTALL_DATA) mini-os-grub/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/pv-grub.gz"
#######
# clean
@@ -320,7 +329,8 @@ clean:
$(MAKE) -C caml clean
$(MAKE) -C c clean
$(MAKE) -C grub clean
- rm -fr libxc ioemu mini-os include
+ [ ! -d libxc ] || $(MAKE) -C libxc clean
+ [ ! -d ioemu ] || $(MAKE) -C ioemu clean
# clean the cross-compilation result
.PHONY: crossclean
@@ -328,6 +338,8 @@ crossclean: clean
rm -fr $(CROSS_ROOT)
rm -fr newlib-build
rm -fr zlib-$(ZLIB_VERSION) pciutils-$(LIBPCI_VERSION)
+ rm -fr libxc ioemu
+ rm -f mk-headers
# clean patched sources
.PHONY: patchclean
diff --git a/stubdom/README b/stubdom/README
index 8a47525491..a70e31a2b4 100644
--- a/stubdom/README
+++ b/stubdom/README
@@ -1,13 +1,3 @@
-To compile
-==========
-
-Just run make -j 4, that will download / patch / compile
-Then make install to install the result.
-
-Also, run make and make install in $XEN_ROOT/tools/fs-back
-
-
-
IOEMU stubdom
=============
@@ -16,6 +6,14 @@ Also, run make and make install in $XEN_ROOT/tools/fs-back
General Configuration
=====================
+Due to a race between the creation of the IOEMU stubdomain itself and allocation
+of video memory for the HVM domain, you need to avoid the need for ballooning,
+by using the hypervisor dom0_mem= option for instance.
+
+
+There is a sample configuration set in xmexample.hvm-stubdom and
+xmexample.hvm-dm
+
In your HVM config "hvmconfig",
- use /usr/lib/xen/bin/stubdom-dm as dm script:
diff --git a/stubdom/stubdom-dm b/stubdom/stubdom-dm
index 49df178fcb..a800bc4746 100644
--- a/stubdom/stubdom-dm
+++ b/stubdom/stubdom-dm
@@ -55,7 +55,7 @@ term() {
kill %1
(
[ -n "$vncpid" ] && kill -9 $vncpid
- xm destroy stubdom-$domname
+ xm destroy $domname-dm
#xm destroy $domname
) &
# We need to exit immediately so as to let xend do the commands above
@@ -67,12 +67,12 @@ trap term SIGHUP
############
# stubdomain
# Wait for any previous stubdom to terminate
-while xm list | grep stubdom-$domname
+while xm list | grep $domname-dm
do
sleep 1
done
-creation="xm create -c stubdom-$domname target=$domid memory=32 extra=\"$extra\""
+creation="xm create -c $domname-dm target=$domid memory=32 extra=\"$extra\""
(while true ; do sleep 60 ; done) | /bin/sh -c "$creation" &
#xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" &
diff --git a/tools/Makefile b/tools/Makefile
index 8920ae9a9b..bcc9fb8d0a 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -55,13 +55,14 @@ install: subdirs-install
clean distclean: subdirs-clean
ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
-IOEMU_CONFIGURE_CROSS ?= --cross-prefix=$(CROSS_COMPILE) \
+IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \
+ --cross-prefix=$(CROSS_COMPILE) \
--interp-prefix=$(CROSS_SYS_ROOT)
endif
ioemu/config-host.mak:
- cd ioemu && XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) sh configure --prefix=/usr \
- $(IOEMU_CONFIGURE_CROSS)
+ cd ioemu && XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) sh configure \
+ --prefix=$(PREFIX) $(IOEMU_CONFIGURE_CROSS)
subdir-all-ioemu subdir-install-ioemu: ioemu/config-host.mak
@@ -78,6 +79,12 @@ ioemu-dir-find:
rm -rf ioemu-remote ioemu-remote.tmp; \
mkdir ioemu-remote.tmp; rmdir ioemu-remote.tmp; \
$(GIT) clone $(CONFIG_QEMU) ioemu-remote.tmp; \
+ if [ "$(QEMU_TAG)" ]; then \
+ cd ioemu-remote.tmp; \
+ $(GIT) branch -D dummy >/dev/null 2>&1 ||:; \
+ $(GIT) checkout -b dummy $(QEMU_TAG); \
+ cd ..; \
+ fi; \
mv ioemu-remote.tmp ioemu-remote; \
fi; \
rm -f ioemu-dir; \
@@ -90,7 +97,7 @@ ioemu-dir-find:
esac; \
export XEN_ROOT; \
cd ioemu-dir; \
- ./xen-setup
+ ./xen-setup $(IOEMU_CONFIGURE_CROSS)
subdir-all-ioemu-dir subdir-install-ioemu-dir: ioemu-dir-find
diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile
index 871e2c4b60..21c9556f60 100644
--- a/tools/blktap/Makefile
+++ b/tools/blktap/Makefile
@@ -8,3 +8,6 @@ SUBDIRS-y += drivers
.PHONY: all clean install
all clean install: %: subdirs-%
+install:
+ $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
+ $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.blktap
diff --git a/tools/blktap/lib/Makefile b/tools/blktap/lib/Makefile
index 4b3f036ee7..aedf7dcd2e 100644
--- a/tools/blktap/lib/Makefile
+++ b/tools/blktap/lib/Makefile
@@ -43,7 +43,7 @@ install: all
.PHONY: clean
clean:
- rm -rf *.a *.so* *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS
+ rm -rf *.a *.so* *.o *.opic *.rpm $(LIB) *~ $(DEPS) xen TAGS
libblktap.so.$(MAJOR).$(MINOR): $(OBJS_PIC)
$(CC) $(CFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,$(SONAME) $(SHLIB_CFLAGS) \
diff --git a/tools/console/Makefile b/tools/console/Makefile
index 3d7e0fe307..10e909a0b6 100644
--- a/tools/console/Makefile
+++ b/tools/console/Makefile
@@ -21,7 +21,7 @@ clean:
xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) \
- $(UTIL_LIBS) $(SOCKET_LIBS)
+ $(UTIL_LIBS) $(SOCKET_LIBS) -lrt
xenconsole: $(patsubst %.c,%.o,$(wildcard client/*.c))
$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) \
diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
index 16a0b6c27e..78aea83ed7 100644
--- a/tools/console/daemon/io.c
+++ b/tools/console/daemon/io.c
@@ -622,9 +622,9 @@ static struct domain *create_domain(int domid)
{
struct domain *dom;
char *s;
- struct timeval tv;
+ struct timespec ts;
- if (gettimeofday(&tv, NULL) < 0) {
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) {
dolog(LOG_ERR, "Cannot get time of day %s:%s:L%d",
__FILE__, __FUNCTION__, __LINE__);
return NULL;
@@ -666,7 +666,7 @@ static struct domain *create_domain(int domid)
dom->buffer.capacity = 0;
dom->buffer.max_capacity = 0;
dom->event_count = 0;
- dom->next_period = (tv.tv_sec * 1000) + (tv.tv_usec / 1000) + RATE_LIMIT_PERIOD;
+ dom->next_period = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + RATE_LIMIT_PERIOD;
dom->next = NULL;
dom->ring_ref = -1;
@@ -971,7 +971,7 @@ void handle_io(void)
struct domain *d, *n;
int max_fd = -1;
struct timeval timeout;
- struct timeval tv;
+ struct timespec ts;
long long now, next_timeout = 0;
FD_ZERO(&readfds);
@@ -985,9 +985,9 @@ void handle_io(void)
max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
}
- if (gettimeofday(&tv, NULL) < 0)
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
return;
- now = (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+ now = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000);
/* Re-calculate any event counter allowances & unblock
domains with new allowance */
diff --git a/tools/examples/Makefile b/tools/examples/Makefile
index 8f967084a5..39310394f5 100644
--- a/tools/examples/Makefile
+++ b/tools/examples/Makefile
@@ -8,11 +8,18 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig.xendomains
# Xen configuration dir and configs to go there.
XEN_CONFIG_DIR = /etc/xen
+XEN_READMES = README
+XEN_READMES += README.incompatibilities
XEN_CONFIGS = xend-config.sxp
XEN_CONFIGS += xm-config.xml
XEN_CONFIGS += xmexample1
XEN_CONFIGS += xmexample2
+XEN_CONFIGS += xmexample3
XEN_CONFIGS += xmexample.hvm
+XEN_CONFIGS += xmexample.hvm-stubdom
+XEN_CONFIGS += xmexample.hvm-dm
+XEN_CONFIGS += xmexample.pv-grub
+XEN_CONFIGS += xmexample.nbd
XEN_CONFIGS += xmexample.vti
XEN_CONFIGS += xend-pci-quirks.sxp
XEN_CONFIGS += xend-pci-permissive.sxp
@@ -59,7 +66,16 @@ all:
build:
.PHONY: install
-install: all install-initd install-configs install-scripts $(HOTPLUGS)
+install: all install-readmes install-initd install-configs install-scripts $(HOTPLUGS)
+
+.PHONY: install-readmes
+install-readmes:
+ [ -d $(DESTDIR)$(XEN_CONFIG_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_CONFIG_DIR)
+ set -e; for i in $(XEN_READMES); \
+ do [ -e $(DESTDIR)$(XEN_CONFIG_DIR)/$$i ] || \
+ $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \
+ done
.PHONY: install-initd
install-initd:
diff --git a/tools/examples/README b/tools/examples/README
index 00ef8dc352..e2e8f434df 100644
--- a/tools/examples/README
+++ b/tools/examples/README
@@ -44,4 +44,8 @@ xmexample3 - an advanced configuration script for 'xm create'
xmexample.nbd - configuration script that uses NBD filesystems
xmexample.hvm - a configuration script for creating a hvm domain with
'xm create'
+xmexample.hvm-stubdom - a configuration script for creating a hvm domain with
+ 'xm create' that utilizes a stubdomain for device model
+xmexample.pv-grub - a configuration script for creating a domain with 'xm create'
+ which boots PV-GRUB.
xmexample.vti - a configuration script for creating a domain on vti
diff --git a/tools/examples/xend-config.sxp b/tools/examples/xend-config.sxp
index 5c1b6ec446..89f9fcae96 100644
--- a/tools/examples/xend-config.sxp
+++ b/tools/examples/xend-config.sxp
@@ -245,3 +245,7 @@
# Rotation count of qemu-dm log file.
#(qemu-dm-logrotate-count 10)
+
+# Path where persistent domain configuration is stored.
+# Default is /var/lib/xend/domains/
+#(xend-domains-path /var/lib/xend/domains)
diff --git a/tools/examples/xmexample.hvm b/tools/examples/xmexample.hvm
index 94a0fd160a..63df017974 100644
--- a/tools/examples/xmexample.hvm
+++ b/tools/examples/xmexample.hvm
@@ -158,11 +158,6 @@ vnc=1
#vncunused=1
#----------------------------------------------------------------------------
-# enable spawning vncviewer for domain's console
-# (only valid when vnc=1), default = 0
-#vncconsole=0
-
-#----------------------------------------------------------------------------
# set password for domain's VNC console
# default is depents on vncpasswd in xend-config.sxp
vncpasswd=''
diff --git a/tools/examples/stubdom-ExampleHVMDomain b/tools/examples/xmexample.hvm-dm
index de1619ba32..de1619ba32 100644
--- a/tools/examples/stubdom-ExampleHVMDomain
+++ b/tools/examples/xmexample.hvm-dm
diff --git a/tools/examples/xmexample.hvm-stubdom b/tools/examples/xmexample.hvm-stubdom
index 117273c5e5..94d6c1b103 100644
--- a/tools/examples/xmexample.hvm-stubdom
+++ b/tools/examples/xmexample.hvm-stubdom
@@ -7,7 +7,7 @@
#============================================================================
#
# This is a version using a stubdomain for device model, see
-# stubdom-ExampleHVMDomain and stubdom/README for more details
+# xmexample.hvm-dm and README.stubdom for more details
# The differences with xmexample.hvm are marked with "STUBDOM"
#----------------------------------------------------------------------------
@@ -30,7 +30,7 @@ memory = 128
# shadow_memory = 8
# A name for your domain. All domains must have different names.
-name = "ExampleHVMDomain"
+name = "xmexample.hvm"
# 128-bit UUID for the domain. The default behavior is to generate a new UUID
# on each call to 'xm create'.
diff --git a/tools/examples/xmexample.pv-grub b/tools/examples/xmexample.pv-grub
new file mode 100644
index 0000000000..a856028ac2
--- /dev/null
+++ b/tools/examples/xmexample.pv-grub
@@ -0,0 +1,212 @@
+# -*- mode: python; -*-
+#============================================================================
+# Python configuration setup for 'xm create'.
+# This script sets the parameters used when a domain is created using 'xm create'.
+# You use a separate script for each domain you want to create, or
+# you can set the parameters for the domain on the xm command line.
+#============================================================================
+
+#----------------------------------------------------------------------------
+# PV GRUB image file.
+kernel = "/usr/lib/xen/boot/pv-grub.gz"
+
+# Optional provided menu.lst.
+#ramdisk = "/boot/guests/menu.lst"
+
+# Sets path to menu.lst
+extra = "(hd0,0)/boot/grub/menu.lst"
+# can be a TFTP-served path (DHCP will automatically be run)
+# extra = "(nd)/netboot/menu.lst"
+# can be configured automatically by GRUB's DHCP option 150 (see grub manual)
+# extra = ""
+
+# Initial memory allocation (in megabytes) for the new domain.
+#
+# WARNING: Creating a domain with insufficient memory may cause out of
+# memory errors. The domain needs enough memory to boot kernel
+# and modules. Allocating less than 32MBs is not recommended.
+memory = 64
+
+# A name for your domain. All domains must have different names.
+name = "ExampleDomain"
+
+# 128-bit UUID for the domain. The default behavior is to generate a new UUID
+# on each call to 'xm create'.
+#uuid = "06ed00fe-1162-4fc4-b5d8-11993ee4a8b9"
+
+# List of which CPUS this domain is allowed to use, default Xen picks
+#cpus = "" # leave to Xen to pick
+#cpus = "0" # all vcpus run on CPU0
+#cpus = "0-3,5,^1" # all vcpus run on cpus 0,2,3,5
+#cpus = ["2", "3"] # VCPU0 runs on CPU2, VCPU1 runs on CPU3
+
+# Number of Virtual CPUS to use, default is 1
+#vcpus = 1
+
+#----------------------------------------------------------------------------
+# Define network interfaces.
+
+# By default, no network interfaces are configured. You may have one created
+# with sensible defaults using an empty vif clause:
+#
+# vif = [ '' ]
+#
+# or optionally override backend, bridge, ip, mac, script, type, or vifname:
+#
+# vif = [ 'mac=00:16:3e:00:00:11, bridge=xenbr0' ]
+#
+# or more than one interface may be configured:
+#
+# vif = [ '', 'bridge=xenbr1' ]
+
+vif = [ '' ]
+
+#----------------------------------------------------------------------------
+# Define the disk devices you want the domain to have access to, and
+# what you want them accessible as.
+# Each disk entry is of the form phy:UNAME,DEV,MODE
+# where UNAME is the device, DEV is the device name the domain will see,
+# and MODE is r for read-only, w for read-write.
+
+disk = [ 'phy:hda1,hda1,w' ]
+
+#----------------------------------------------------------------------------
+# Define frame buffer device.
+#
+# By default, no frame buffer device is configured.
+#
+# To create one using the SDL backend and sensible defaults:
+#
+# vfb = [ 'type=sdl' ]
+#
+# This uses environment variables XAUTHORITY and DISPLAY. You
+# can override that:
+#
+# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+#
+# To create one using the VNC backend and sensible defaults:
+#
+# vfb = [ 'type=vnc' ]
+#
+# The backend listens on 127.0.0.1 port 5900+N by default, where N is
+# the domain ID. You can override both address and N:
+#
+# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ]
+#
+# Or you can bind the first unused port above 5900:
+#
+# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ]
+#
+# You can override the password:
+#
+# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+#
+# Empty password disables authentication. Defaults to the vncpasswd
+# configured in xend-config.sxp.
+
+#----------------------------------------------------------------------------
+# Define to which TPM instance the user domain should communicate.
+# The vtpm entry is of the form 'instance=INSTANCE,backend=DOM'
+# where INSTANCE indicates the instance number of the TPM the VM
+# should be talking to and DOM provides the domain where the backend
+# is located.
+# Note that no two virtual machines should try to connect to the same
+# TPM instance. The handling of all TPM instances does require
+# some management effort in so far that VM configration files (and thus
+# a VM) should be associated with a TPM instance throughout the lifetime
+# of the VM / VM configuration file. The instance number must be
+# greater or equal to 1.
+#vtpm = [ 'instance=1,backend=0' ]
+
+#----------------------------------------------------------------------------
+# Set the kernel command line for the new domain.
+# You only need to define the IP parameters and hostname if the domain's
+# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP.
+# You can use 'extra' to set the runlevel and custom environment
+# variables used by custom rc scripts (e.g. VMID=, usr= ).
+
+# Set if you want dhcp to allocate the IP address.
+#dhcp="dhcp"
+# Set netmask.
+#netmask=
+# Set default gateway.
+#gateway=
+# Set the hostname.
+#hostname= "vm%d" % vmid
+
+# Set root device.
+root = "/dev/hda1 ro"
+
+# Root device for nfs.
+#root = "/dev/nfs"
+# The nfs server.
+#nfs_server = '192.0.2.1'
+# Root directory on the nfs server.
+#nfs_root = '/full/path/to/root/directory'
+
+#----------------------------------------------------------------------------
+# Configure the behaviour when a domain exits. There are three 'reasons'
+# for a domain to stop: poweroff, reboot, and crash. For each of these you
+# may specify:
+#
+# "destroy", meaning that the domain is cleaned up as normal;
+# "restart", meaning that a new domain is started in place of the old
+# one;
+# "preserve", meaning that no clean-up is done until the domain is
+# manually destroyed (using xm destroy, for example); or
+# "rename-restart", meaning that the old domain is not cleaned up, but is
+# renamed and a new domain started in its place.
+#
+# In the event a domain stops due to a crash, you have the additional options:
+#
+# "coredump-destroy", meaning dump the crashed domain's core and then destroy;
+# "coredump-restart', meaning dump the crashed domain's core and the restart.
+#
+# The default is
+#
+# on_poweroff = 'destroy'
+# on_reboot = 'restart'
+# on_crash = 'restart'
+#
+# For backwards compatibility we also support the deprecated option restart
+#
+# restart = 'onreboot' means on_poweroff = 'destroy'
+# on_reboot = 'restart'
+# on_crash = 'destroy'
+#
+# restart = 'always' means on_poweroff = 'restart'
+# on_reboot = 'restart'
+# on_crash = 'restart'
+#
+# restart = 'never' means on_poweroff = 'destroy'
+# on_reboot = 'destroy'
+# on_crash = 'destroy'
+
+#on_poweroff = 'destroy'
+#on_reboot = 'restart'
+#on_crash = 'restart'
+
+#-----------------------------------------------------------------------------
+# Configure PVSCSI devices:
+#
+#vscsi=[ 'PDEV, VDEV' ]
+#
+# PDEV gives physical SCSI device to be attached to specified guest
+# domain by one of the following identifier format.
+# - XX:XX:XX:XX (4-tuples with decimal notation which shows
+# "host:channel:target:lun")
+# - /dev/sdxx or sdx
+# - /dev/stxx or stx
+# - /dev/sgxx or sgx
+# - result of 'scsi_id -gu -s'.
+# ex. # scsi_id -gu -s /block/sdb
+# 36000b5d0006a0000006a0257004c0000
+#
+# VDEV gives virtual SCSI device by 4-tuples (XX:XX:XX:XX) as
+# which the specified guest domain recognize.
+#
+
+#vscsi = [ '/dev/sdx, 0:0:0:0' ]
+
+#============================================================================
+
diff --git a/tools/examples/xmexample.vti b/tools/examples/xmexample.vti
index 9493bae130..3169e52d03 100644
--- a/tools/examples/xmexample.vti
+++ b/tools/examples/xmexample.vti
@@ -95,11 +95,6 @@ vnc=0
#vncunused=1
#----------------------------------------------------------------------------
-# enable spawning vncviewer for domain's console
-# (only valid when vnc=1), default = 0
-#vncconsole=0
-
-#----------------------------------------------------------------------------
# set password for domain's VNC console
# default is depents on vncpasswd in xend-config.sxp
vncpasswd=''
diff --git a/tools/examples/xmexample3 b/tools/examples/xmexample3
index 8610fcc483..99281904fd 100644
--- a/tools/examples/xmexample3
+++ b/tools/examples/xmexample3
@@ -207,4 +207,26 @@ extra = "4 VMID=%d" % vmid
#on_reboot = 'restart'
#on_crash = 'restart'
+#-----------------------------------------------------------------------------
+# Configure PVSCSI devices:
+#
+#vscsi=[ 'PDEV, VDEV' ]
+#
+# PDEV gives physical SCSI device to be attached to specified guest
+# domain by one of the following identifier format.
+# - XX:XX:XX:XX (4-tuples with decimal notation which shows
+# "host:channel:target:lun")
+# - /dev/sdxx or sdx
+# - /dev/stxx or stx
+# - /dev/sgxx or sgx
+# - result of 'scsi_id -gu -s'.
+# ex. # scsi_id -gu -s /block/sdb
+# 36000b5d0006a0000006a0257004c0000
+#
+# VDEV gives virtual SCSI device by 4-tuples (XX:XX:XX:XX) as
+# which the specified guest domain recognize.
+#
+
+#vscsi = [ '/dev/sdx, 0:0:0:0' ]
+
#============================================================================
diff --git a/tools/firmware/extboot/Makefile b/tools/firmware/extboot/Makefile
index eec5831c3a..def1fbb861 100644
--- a/tools/firmware/extboot/Makefile
+++ b/tools/firmware/extboot/Makefile
@@ -1,8 +1,6 @@
XEN_ROOT = ../../..
include $(XEN_ROOT)/tools/firmware/Rules.mk
-CFLAGS += -I$(XEN_ROOT)/tools/libxc -I.
-
.PHONY: all
all: extboot.bin
diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile
index 826338560e..5988da6816 100644
--- a/tools/firmware/hvmloader/Makefile
+++ b/tools/firmware/hvmloader/Makefile
@@ -33,14 +33,14 @@ SRCS += 32bitbios_support.c smp.c cacheattr.c
OBJS = $(patsubst %.c,%.o,$(SRCS))
.PHONY: all
-all: hvmloader
+all: subdirs-all
+ $(MAKE) hvmloader
hvmloader.o: roms.h
smbios.o: CFLAGS += -D__SMBIOS_DATE__="\"$(shell date +%m/%d/%Y)\""
-hvmloader: subdirs-all $(OBJS)
- $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) \
- -o hvmloader.tmp $(OBJS) acpi/acpi.a
+hvmloader: $(OBJS) acpi/acpi.a
+ $(LD) $(LDFLAGS_DIRECT) -N -Ttext $(LOADADDR) -o hvmloader.tmp $^
$(OBJCOPY) hvmloader.tmp hvmloader
rm -f hvmloader.tmp
diff --git a/tools/firmware/hvmloader/acpi/acpi2_0.h b/tools/firmware/hvmloader/acpi/acpi2_0.h
index 5846cb2fac..cf0ca01617 100644
--- a/tools/firmware/hvmloader/acpi/acpi2_0.h
+++ b/tools/firmware/hvmloader/acpi/acpi2_0.h
@@ -381,7 +381,7 @@ struct acpi_20_madt_intsrcovr {
#pragma pack ()
-int acpi_build_tables(uint8_t *);
+void acpi_build_tables(void);
#endif /* _ACPI_2_0_H_ */
diff --git a/tools/firmware/hvmloader/acpi/build.c b/tools/firmware/hvmloader/acpi/build.c
index 75e1a60e55..e753bf3276 100644
--- a/tools/firmware/hvmloader/acpi/build.c
+++ b/tools/firmware/hvmloader/acpi/build.c
@@ -248,8 +248,7 @@ static int construct_secondary_tables(uint8_t *buf, unsigned long *table_ptrs)
return align16(offset);
}
-/* Copy all the ACPI table to buffer. */
-int acpi_build_tables(uint8_t *buf)
+static void __acpi_build_tables(uint8_t *buf, int *low_sz, int *high_sz)
{
struct acpi_20_rsdp *rsdp;
struct acpi_20_rsdt *rsdt;
@@ -261,7 +260,9 @@ int acpi_build_tables(uint8_t *buf)
unsigned long secondary_tables[16];
int offset = 0, i;
- offset += construct_bios_info_table(&buf[offset]);
+ /*
+ * Fill in high-memory data structures, starting at @buf.
+ */
facs = (struct acpi_20_facs *)&buf[offset];
memcpy(facs, &Facs, sizeof(struct acpi_20_facs));
@@ -325,7 +326,18 @@ int acpi_build_tables(uint8_t *buf)
offsetof(struct acpi_header, checksum),
rsdt->header.length);
+ *high_sz = offset;
+
+ /*
+ * Fill in low-memory data structures: bios_info_table and RSDP.
+ */
+
+ buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS;
+ offset = 0;
+
+ offset += construct_bios_info_table(&buf[offset]);
rsdp = (struct acpi_20_rsdp *)&buf[offset];
+
memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp));
offset += align16(sizeof(struct acpi_20_rsdp));
rsdp->rsdt_address = (unsigned long)rsdt;
@@ -337,7 +349,28 @@ int acpi_build_tables(uint8_t *buf)
offsetof(struct acpi_20_rsdp, extended_checksum),
sizeof(struct acpi_20_rsdp));
- return offset;
+ *low_sz = offset;
+}
+
+void acpi_build_tables(void)
+{
+ int high_sz, low_sz;
+ uint8_t *buf;
+
+ /* Find out size of high-memory ACPI data area. */
+ buf = (uint8_t *)&_end;
+ __acpi_build_tables(buf, &low_sz, &high_sz);
+ memset(buf, 0, high_sz);
+
+ /* Allocate data area and set up ACPI tables there. */
+ buf = (uint8_t *)e820_malloc(high_sz);
+ __acpi_build_tables(buf, &low_sz, &high_sz);
+
+ printf(" - Lo data: %08lx-%08lx\n"
+ " - Hi data: %08lx-%08lx\n",
+ (unsigned long)ACPI_PHYSICAL_ADDRESS,
+ (unsigned long)ACPI_PHYSICAL_ADDRESS + low_sz - 1,
+ (unsigned long)buf, (unsigned long)buf + high_sz - 1);
}
/*
diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c
index 5f1f22e6dd..f0e5816caa 100644
--- a/tools/firmware/hvmloader/hvmloader.c
+++ b/tools/firmware/hvmloader/hvmloader.c
@@ -449,7 +449,7 @@ static void init_xen_platform_io_base(void)
int main(void)
{
- int acpi_sz = 0, vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz;
+ int vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz;
int extboot_sz = 0;
printf("HVM Loader\n");
@@ -508,8 +508,7 @@ int main(void)
if ( get_acpi_enabled() )
{
printf("Loading ACPI ...\n");
- acpi_sz = acpi_build_tables((uint8_t *)ACPI_PHYSICAL_ADDRESS);
- ASSERT((ACPI_PHYSICAL_ADDRESS + acpi_sz) <= 0xF0000);
+ acpi_build_tables();
}
cmos_write_memory_size();
@@ -531,10 +530,6 @@ int main(void)
printf(" %05x-%05x: SMBIOS tables\n",
SMBIOS_PHYSICAL_ADDRESS,
SMBIOS_PHYSICAL_ADDRESS + smbios_sz - 1);
- if ( acpi_sz )
- printf(" %05x-%05x: ACPI tables\n",
- ACPI_PHYSICAL_ADDRESS,
- ACPI_PHYSICAL_ADDRESS + acpi_sz - 1);
if ( rombios_sz )
printf(" %05x-%05x: Main BIOS\n",
ROMBIOS_PHYSICAL_ADDRESS,
diff --git a/tools/firmware/hvmloader/util.h b/tools/firmware/hvmloader/util.h
index 455b2f66e6..4d85e2cef9 100644
--- a/tools/firmware/hvmloader/util.h
+++ b/tools/firmware/hvmloader/util.h
@@ -145,4 +145,6 @@ void smp_initialise(void);
#define isdigit(c) ((c) >= '0' && (c) <= '9')
+extern char _start[], _end[];
+
#endif /* __HVMLOADER_UTIL_H__ */
diff --git a/tools/firmware/rombios/32bit/32bitbios.c b/tools/firmware/rombios/32bit/32bitbios.c
index 14dba9bf33..551a9ffdc0 100644
--- a/tools/firmware/rombios/32bit/32bitbios.c
+++ b/tools/firmware/rombios/32bit/32bitbios.c
@@ -47,5 +47,7 @@ uint32_t jumptable[IDX_LAST+1] __attribute__((section (".biosjumptable"))) =
TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm),
+ TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector),
+
TABLE_ENTRY(IDX_LAST , 0) /* keep last */
};
diff --git a/tools/firmware/rombios/32bit/Makefile b/tools/firmware/rombios/32bit/Makefile
index f42d7f39b5..cdad7561b9 100644
--- a/tools/firmware/rombios/32bit/Makefile
+++ b/tools/firmware/rombios/32bit/Makefile
@@ -4,21 +4,22 @@ include $(XEN_ROOT)/tools/firmware/Rules.mk
SOURCES = util.c
TARGET = 32bitbios_flat.h
-CFLAGS += -I../ -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS
SUBDIRS = tcgbios
MODULES = tcgbios/tcgbiosext.o
.PHONY: all
-all: $(TARGET)
+all: subdirs-all
+ $(MAKE) $(TARGET)
.PHONY: clean
clean: subdirs-clean
rm -rf *.o $(TARGET)
-$(TARGET): subdirs-all 32bitbios.o util.o
- $(LD) $(LDFLAGS_DIRECT) -s -r 32bitbios.o $(MODULES) util.o -o 32bitbios_all.o
+$(TARGET): 32bitbios.o $(MODULES) util.o
+ $(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
@nm 32bitbios_all.o | \
egrep '^ +U ' >/dev/null && { \
echo "There are undefined symbols in the BIOS:"; \
diff --git a/tools/firmware/rombios/32bit/tcgbios/Makefile b/tools/firmware/rombios/32bit/tcgbios/Makefile
index e272f288f6..1b3cf2b94a 100644
--- a/tools/firmware/rombios/32bit/tcgbios/Makefile
+++ b/tools/firmware/rombios/32bit/tcgbios/Makefile
@@ -5,7 +5,7 @@ TARGET = tcgbiosext.o
FILES = tcgbios tpm_drivers
OBJECTS = $(foreach f,$(FILES),$(f).o)
-CFLAGS += -I../ -I../../ -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS
.PHONY: all clean
diff --git a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
index 263607471b..b06af22f00 100644
--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c
@@ -24,10 +24,9 @@
#include "rombios_compat.h"
#include "tpm_drivers.h"
+#include "util.h"
#include "tcgbios.h"
#include "32bitprotos.h"
-#include "util.h"
-
/* local structure and variables */
struct ptti_cust {
@@ -135,7 +134,7 @@ static inline uint32_t bswap(uint32_t a)
*******************************************************/
typedef struct {
- struct acpi_20_tcpa *tcpa_ptr;
+ struct acpi_20_tcpa_clisrv *tcpa_ptr;
unsigned char *lasa_last_ptr;
uint16_t entry_count;
uint16_t flags;
@@ -260,45 +259,19 @@ uint8_t acpi_validate_entry(struct acpi_header *hdr)
}
-/*
- * Search for the RSDP ACPI table in the memory starting at addr and
- * ending at addr + len - 1.
- */
-static struct acpi_20_rsdp *find_rsdp(const void *start, unsigned int len)
-{
- char *rsdp = (char *)start;
- char *end = rsdp + len;
- /* scan memory in steps of 16 bytes */
- while (rsdp < end) {
- /* check for expected string */
- if (!strncmp( rsdp, "RSD PTR ", 8))
- return (struct acpi_20_rsdp *)rsdp;
- rsdp += 0x10;
- }
- return 0;
-}
-
void tcpa_acpi_init(void)
{
struct acpi_20_rsdt *rsdt;
- struct acpi_20_tcpa *tcpa = (void *)0;
+ struct acpi_20_tcpa_clisrv *tcpa = (void *)0;
struct acpi_20_rsdp *rsdp;
uint32_t length;
uint16_t off;
int found = 0;
- uint16_t ebda_seg;
- if (MA_IsTPMPresent() == 0) {
+ if (MA_IsTPMPresent() == 0)
return;
- }
-
- /* RSDP in EBDA? */
- ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
- rsdp = find_rsdp((void *)(ebda_seg << 16), 1024);
-
- if (!rsdp)
- rsdp = find_rsdp((void *)(ACPI_SEGMENT << 4), 0x20000);
+ rsdp = find_rsdp();
if (rsdp) {
uint32_t ctr = 0;
/* get RSDT from RSDP */
@@ -307,7 +280,7 @@ void tcpa_acpi_init(void)
off = 36;
while ((off + 3) < length) {
/* try all pointers to structures */
- tcpa = (struct acpi_20_tcpa *)rsdt->entry[ctr];
+ tcpa = (struct acpi_20_tcpa_clisrv *)rsdt->entry[ctr];
/* valid TCPA ACPI table ? */
if (ACPI_2_0_TCPA_SIGNATURE == tcpa->header.signature
&& acpi_validate_entry(&tcpa->header) == 0) {
@@ -398,7 +371,7 @@ static
unsigned char *tcpa_get_lasa_base_ptr(void)
{
unsigned char *lasa = 0;
- struct acpi_20_tcpa *tcpa = tcpa_acpi.tcpa_ptr;
+ struct acpi_20_tcpa_clisrv *tcpa = tcpa_acpi.tcpa_ptr;
if (tcpa != 0) {
uint32_t class = tcpa->platform_class;
if (class == TCPA_ACPI_CLASS_CLIENT) {
@@ -416,7 +389,7 @@ static
uint32_t tcpa_get_laml(void)
{
uint32_t laml = 0;
- struct acpi_20_tcpa *tcpa = tcpa_acpi.tcpa_ptr;
+ struct acpi_20_tcpa_clisrv *tcpa = tcpa_acpi.tcpa_ptr;
if (tcpa != 0) {
uint32_t class = tcpa->platform_class;
if (class == TCPA_ACPI_CLASS_CLIENT) {
diff --git a/tools/firmware/rombios/32bit/tcgbios/tcgbios.h b/tools/firmware/rombios/32bit/tcgbios/tcgbios.h
index bf2b133c52..f16b586b28 100644
--- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.h
+++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.h
@@ -1,7 +1,6 @@
#ifndef TCGBIOS_H
#define TCGBIOS_H
-
/* TCPA ACPI definitions */
#define TCPA_ACPI_CLASS_CLIENT 0
#define TCPA_ACPI_CLASS_SERVER 1
@@ -117,15 +116,8 @@
/* address of locality 0 (TIS) */
#define TPM_TIS_BASE_ADDRESS 0xfed40000
-#define ASCII32(a,b,c,d) ((((Bit32u)a) << 0) | (((Bit32u)b) << 8) | \
- (((Bit32u)c) << 16) | (((Bit32u)d) << 24) )
-#define ACPI_2_0_TCPA_SIGNATURE ASCII32('T','C','P','A') /* "TCPA" */
-
-
#define STATUS_FLAG_SHUTDOWN (1 << 0)
-#define ACPI_SEGMENT 0xE000
-
/* Input and Output blocks for the TCG BIOS commands */
struct hleei_short
@@ -232,37 +224,6 @@ struct pcpes
uint32_t event;
} __attribute__((packed));
-
-struct acpi_header
-{
- uint32_t signature;
- uint32_t length;
- uint8_t revision;
- uint8_t checksum;
- uint8_t oem_id[6];
- uint64_t oem_table_id;
- uint32_t oem_revision;
- uint32_t creator_id;
- uint32_t creator_revision;
-} __attribute__((packed));
-
-struct acpi_20_rsdt {
- struct acpi_header header;
- uint32_t entry[1];
-} __attribute__((packed));
-
-struct acpi_20_rsdp {
- uint64_t signature;
- uint8_t checksum;
- uint8_t oem_id[6];
- uint8_t revision;
- uint32_t rsdt_address;
- uint32_t length;
- uint64_t xsdt_address;
- uint8_t extended_checksum;
- uint8_t reserved[3];
-} __attribute__((packed));
-
struct acpi_20_tcpa_client {
uint32_t laml;
uint64_t lasa;
@@ -275,7 +236,7 @@ struct acpi_20_tcpa_server {
/* more here */
} __attribute__((packed));
-struct acpi_20_tcpa {
+struct acpi_20_tcpa_clisrv {
struct acpi_header header;
uint16_t platform_class;
union {
diff --git a/tools/firmware/rombios/32bit/util.c b/tools/firmware/rombios/32bit/util.c
index ad60b9eedb..a47bb71cde 100644
--- a/tools/firmware/rombios/32bit/util.c
+++ b/tools/firmware/rombios/32bit/util.c
@@ -19,6 +19,7 @@
*/
#include <stdarg.h>
#include <stdint.h>
+#include "rombios_compat.h"
#include "util.h"
static void putchar(char c);
@@ -92,11 +93,11 @@ int strcmp(const char *cs, const char *ct)
int strncmp(const char *s1, const char *s2, uint32_t n)
{
- uint32_t ctr;
- for (ctr = 0; ctr < n; ctr++)
- if (s1[ctr] != s2[ctr])
- return (int)(s1[ctr] - s2[ctr]);
- return 0;
+ uint32_t ctr;
+ for (ctr = 0; ctr < n; ctr++)
+ if (s1[ctr] != s2[ctr])
+ return (int)(s1[ctr] - s2[ctr]);
+ return 0;
}
void *memcpy(void *dest, const void *src, unsigned n)
@@ -402,3 +403,64 @@ void mssleep(uint32_t waittime)
y = x;
}
}
+
+/*
+ * Search for the RSDP ACPI table in the memory starting at addr and
+ * ending at addr + len - 1.
+ */
+static struct acpi_20_rsdp *__find_rsdp(const void *start, unsigned int len)
+{
+ char *rsdp = (char *)start;
+ char *end = rsdp + len;
+ /* scan memory in steps of 16 bytes */
+ while (rsdp < end) {
+ /* check for expected string */
+ if (!strncmp(rsdp, "RSD PTR ", 8))
+ return (struct acpi_20_rsdp *)rsdp;
+ rsdp += 0x10;
+ }
+ return 0;
+}
+
+struct acpi_20_rsdp *find_rsdp(void)
+{
+ struct acpi_20_rsdp *rsdp;
+ uint16_t ebda_seg;
+
+ ebda_seg = *(uint16_t *)ADDR_FROM_SEG_OFF(0x40, 0xe);
+ rsdp = __find_rsdp((void *)(ebda_seg << 16), 1024);
+ if (!rsdp)
+ rsdp = __find_rsdp((void *)0xE0000, 0x20000);
+
+ return rsdp;
+}
+
+uint32_t get_s3_waking_vector(void)
+{
+ struct acpi_20_rsdp *rsdp = find_rsdp();
+ struct acpi_20_xsdt *xsdt;
+ struct acpi_20_fadt *fadt;
+ struct acpi_20_facs *facs;
+ uint32_t vector;
+
+ if (!rsdp)
+ return 0;
+
+ xsdt = (struct acpi_20_xsdt *)(long)rsdp->xsdt_address;
+ if (!xsdt)
+ return 0;
+
+ fadt = (struct acpi_20_fadt *)(long)xsdt->entry[0];
+ if (!fadt || (fadt->header.signature != ACPI_2_0_FADT_SIGNATURE))
+ return 0;
+
+ facs = (struct acpi_20_facs *)(long)fadt->x_firmware_ctrl;
+ if (!facs)
+ return 0;
+
+ vector = facs->x_firmware_waking_vector;
+ if (!vector)
+ vector = facs->firmware_waking_vector;
+
+ return vector;
+}
diff --git a/tools/firmware/rombios/32bit/util.h b/tools/firmware/rombios/32bit/util.h
index 6d05b502d5..e245be6569 100644
--- a/tools/firmware/rombios/32bit/util.h
+++ b/tools/firmware/rombios/32bit/util.h
@@ -1,6 +1,8 @@
#ifndef UTIL_H
#define UTIL_H
+#include "../hvmloader/acpi/acpi2_0.h"
+
void outb(uint16_t addr, uint8_t val);
void outw(uint16_t addr, uint16_t val);
void outl(uint16_t addr, uint32_t val);
@@ -39,5 +41,6 @@ static inline uint32_t mmio_readl(uint32_t *addr)
return *(volatile uint32_t *)addr;
}
+struct acpi_20_rsdp *find_rsdp(void);
#endif
diff --git a/tools/firmware/rombios/32bitgateway.c b/tools/firmware/rombios/32bitgateway.c
index 4da5a39c45..9592dfbca1 100644
--- a/tools/firmware/rombios/32bitgateway.c
+++ b/tools/firmware/rombios/32bitgateway.c
@@ -356,6 +356,9 @@ Upcall:
call _store_returnaddress ; store away
pop ax
+ ; XXX GDT munging requires ROM to be writable!
+ call _enable_rom_write_access
+
rol bx, #2
mov si, #jmptable
seg cs
@@ -382,6 +385,8 @@ Upcall:
mov bp,sp
push eax ; preserve work register
+ call _disable_rom_write_access
+
call _get_returnaddress
mov 2[bp], ax ; 16bit return address onto stack
@@ -408,3 +413,10 @@ ASM_END
#include "32bitgateway.h"
#include "tcgbios.c"
+
+Bit32u get_s3_waking_vector()
+{
+ ASM_START
+ DoUpcall(IDX_GET_S3_WAKING_VECTOR)
+ ASM_END
+}
diff --git a/tools/firmware/rombios/32bitprotos.h b/tools/firmware/rombios/32bitprotos.h
index 69ec87dc34..f0c401476a 100644
--- a/tools/firmware/rombios/32bitprotos.h
+++ b/tools/firmware/rombios/32bitprotos.h
@@ -17,8 +17,8 @@
#define IDX_TCPA_IPL 10
#define IDX_TCPA_INITIALIZE_TPM 11
#define IDX_TCPA_MEASURE_POST 12
-
-#define IDX_LAST 13 /* keep last! */
+#define IDX_GET_S3_WAKING_VECTOR 13
+#define IDX_LAST 14 /* keep last! */
#ifdef GCC_PROTOS
#define PARMS(x...) x
@@ -42,4 +42,6 @@ void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) );
void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
+Bit32u get_s3_waking_vector( PARMS(void) );
+
#endif
diff --git a/tools/firmware/rombios/Makefile b/tools/firmware/rombios/Makefile
index be2fef672a..8ea8cb7560 100644
--- a/tools/firmware/rombios/Makefile
+++ b/tools/firmware/rombios/Makefile
@@ -4,7 +4,8 @@ include $(XEN_ROOT)/tools/Rules.mk
SUBDIRS := 32bit
.PHONY: all
-all: subdirs-all BIOS-bochs-latest
+all: subdirs-all
+ $(MAKE) BIOS-bochs-latest
.PHONY: clean
clean: subdirs-clean
diff --git a/tools/firmware/rombios/rombios.c b/tools/firmware/rombios/rombios.c
index c3b3828749..547d5cff87 100644
--- a/tools/firmware/rombios/rombios.c
+++ b/tools/firmware/rombios/rombios.c
@@ -738,7 +738,9 @@ typedef struct {
// EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot
// device tables are at 0x9ff00 -- 0x9ffff
typedef struct {
- unsigned char filler1[0x3D];
+ unsigned char ebda_size;
+ unsigned char cmos_shutdown_status;
+ unsigned char filler1[0x3B];
// FDPT - Can be splitted in data members if needed
unsigned char fdpt0[0x10];
@@ -757,6 +759,7 @@ typedef struct {
upcall_t upcall;
} ebda_data_t;
+ #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
#define EbdaData ((ebda_data_t *) 0)
// for access to the int13ext structure
@@ -1464,20 +1467,31 @@ copy_e820_table()
}
void
-disable_rom_write_access()
+set_rom_write_access(action)
+ Bit16u action;
{
Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob;
ASM_START
- mov si,.disable_rom_write_access.off[bp]
+ mov si,.set_rom_write_access.off[bp]
push ds
mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4)
mov ds,ax
mov dx,[si]
pop ds
- mov ax,#PFFLAG_ROM_LOCK
+ mov ax,.set_rom_write_access.action[bp]
out dx,al
ASM_END
}
+
+void enable_rom_write_access()
+{
+ set_rom_write_access(0);
+}
+
+void disable_rom_write_access()
+{
+ set_rom_write_access(PFFLAG_ROM_LOCK);
+}
#endif /* HVMASSIST */
@@ -2325,78 +2339,38 @@ debugger_off()
outb(0xfedc, 0x00);
}
-/* according to memory layout defined in acpi_build_tables(),
- acpi FACS table is located in ACPI_PHYSICAL_ADDRESS(0xEA000) */
-#define ACPI_FACS_ADDRESS 0xEA000
-#define ACPI_FACS_OFFSET 0x10
-/* S3 resume status in CMOS 0Fh shutdown status byte*/
-
-Bit32u facs_get32(offs)
-Bit16u offs;
-{
-ASM_START
- push bp
- mov bp, sp
-
- push ds
- mov ax, #(ACPI_FACS_ADDRESS >> 4)
- mov ds, ax
-
- mov bx, 4[bp]
- mov ax, [bx]
- mov dx, 2[bx]
- pop ds
-
- pop bp
-ASM_END
-}
-
-
void
s3_resume()
{
Bit32u s3_wakeup_vector;
- extern Bit16u s3_wakeup_ip;
- extern Bit16u s3_wakeup_cs;
- extern Bit8u s3_resume_flag;
+ Bit16u s3_wakeup_ip, s3_wakeup_cs;
+ Bit8u cmos_shutdown_status;
ASM_START
push ds
- mov ax, #0xF000
+ push ax
+ mov ax, #EBDA_SEG
mov ds, ax
+ mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET]
+ mov .s3_resume.cmos_shutdown_status[bp], al
+ pop ax
+ pop ds
ASM_END
- if (s3_resume_flag!=CMOS_SHUTDOWN_S3){
- goto s3_out;
- }
- s3_resume_flag = 0;
-
- /* get x_firmware_waking_vector */
- s3_wakeup_vector = facs_get32(ACPI_FACS_OFFSET+24);
- if (!s3_wakeup_vector) {
- /* get firmware_waking_vector */
- s3_wakeup_vector = facs_get32(ACPI_FACS_OFFSET+12);
- if (!s3_wakeup_vector) {
- goto s3_out;
- }
- }
+ if (cmos_shutdown_status != CMOS_SHUTDOWN_S3)
+ return;
+
+ s3_wakeup_vector = get_s3_waking_vector();
+ if (!s3_wakeup_vector)
+ return;
- /* setup wakeup vector */
s3_wakeup_ip = s3_wakeup_vector & 0xF;
s3_wakeup_cs = s3_wakeup_vector >> 4;
ASM_START
- jmpf [_s3_wakeup_ip]
-
-; S3 data
-_s3_wakeup_ip: dw 0x0a
-_s3_wakeup_cs: dw 0x0
-_s3_resume_flag: db 0 ; set at POST time by CMOS[0xF] shutdown status
-ASM_END
-
-s3_out:
-ASM_START
- pop ds
+ push .s3_resume.s3_wakeup_cs[bp]
+ push .s3_resume.s3_wakeup_ip[bp]
+ retf
ASM_END
}
@@ -9865,52 +9839,9 @@ post:
;; Examine CMOS shutdown status.
mov al, bl
-
- ;; 0xFE S3 resume
- cmp AL, #0xFE
- jnz not_s3_resume
-
- ;; set S3 resume flag
- mov dx, #0xF000
+ mov dx, #EBDA_SEG
mov ds, dx
- mov [_s3_resume_flag], AL
- jmp normal_post
-
-not_s3_resume:
-
- ;; 0x00, 0x09, 0x0D+ = normal startup
- cmp AL, #0x00
- jz normal_post
- cmp AL, #0x0d
- jae normal_post
- cmp AL, #0x09
- je normal_post
-
- ;; 0x05 = eoi + jmp via [0x40:0x67] jump
- cmp al, #0x05
- je eoi_jmp_post
-
- ;; Examine CMOS shutdown status.
- ;; 0x01,0x02,0x03,0x04,0x06,0x07,0x08, 0x0a, 0x0b, 0x0c = Unimplemented shutdown status.
- push bx
- call _shutdown_status_panic
-
-#if 0
- HALT(__LINE__)
- ;
- ;#if 0
- ; 0xb0, 0x20, /* mov al, #0x20 */
- ; 0xe6, 0x20, /* out 0x20, al ;send EOI to PIC */
- ;#endif
- ;
- pop es
- pop ds
- popa
- iret
-#endif
-
-normal_post:
- ; case 0: normal startup
+ mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL
cli
mov ax, #0xfffe
@@ -9929,8 +9860,6 @@ normal_post:
call _log_bios_start
- call _clobber_entry_point
-
;; set all interrupts to default handler
mov bx, #0x0000 ;; offset index
mov cx, #0x0100 ;; counter (256 interrupts)
@@ -10123,8 +10052,11 @@ post_default_ints:
out 0xa1, AL ;slave pic: unmask IRQ 12, 13, 14
#ifdef HVMASSIST
+ call _enable_rom_write_access
+ call _clobber_entry_point
call _copy_e820_table
call smbios_init
+ call _disable_rom_write_access
#endif
call _init_boot_vectors
@@ -10175,10 +10107,6 @@ post_default_ints:
call tcpa_post_part2
#endif
-#ifdef HVMASSIST
- call _disable_rom_write_access
-#endif
-
;; Start the boot sequence. See the comments in int19_relocated
;; for why we use INT 18h instead of INT 19h here.
int #0x18
diff --git a/tools/fs-back/fs-backend.c b/tools/fs-back/fs-backend.c
index cdf3edbf0b..f0d2758627 100644
--- a/tools/fs-back/fs-backend.c
+++ b/tools/fs-back/fs-backend.c
@@ -16,7 +16,7 @@ static struct fs_export *fs_exports = NULL;
static int export_id = 0;
static int mount_id = 0;
-void dispatch_response(struct mount *mount, int priv_req_id)
+static void dispatch_response(struct fs_mount *mount, int priv_req_id)
{
int i;
struct fs_op *op;
@@ -41,7 +41,7 @@ void dispatch_response(struct mount *mount, int priv_req_id)
add_id_to_freelist(priv_req_id, mount->freelist);
}
-static void handle_aio_events(struct mount *mount)
+static void handle_aio_events(struct fs_mount *mount)
{
int fd, ret, count, i, notify;
evtchn_port_t port;
@@ -103,7 +103,7 @@ read_event_channel:
}
-void allocate_request_array(struct mount *mount)
+static void allocate_request_array(struct fs_mount *mount)
{
int i, nr_entries = mount->nr_entries;
struct fs_request *requests;
@@ -123,10 +123,10 @@ void allocate_request_array(struct mount *mount)
}
-void* handle_mount(void *data)
+static void *handle_mount(void *data)
{
int more, notify;
- struct mount *mount = (struct mount *)data;
+ struct fs_mount *mount = (struct fs_mount *)data;
printf("Starting a thread for mount: %d\n", mount->mount_id);
allocate_request_array(mount);
@@ -147,7 +147,8 @@ moretodo:
int i;
struct fs_op *op;
- printf("Got a request at %d\n", cons);
+ printf("Got a request at %d (of %d)\n",
+ cons, RING_SIZE(&mount->ring));
req = RING_GET_REQUEST(&mount->ring, cons);
printf("Request type=%d\n", req->type);
for(i=0;;i++)
@@ -193,11 +194,12 @@ moretodo:
static void handle_connection(int frontend_dom_id, int export_id, char *frontend)
{
- struct mount *mount;
+ struct fs_mount *mount;
struct fs_export *export;
int evt_port;
pthread_t handling_thread;
struct fsif_sring *sring;
+ uint32_t dom_ids[MAX_RING_SIZE];
int i;
printf("Handling connection from dom=%d, for export=%d\n",
@@ -216,13 +218,13 @@ static void handle_connection(int frontend_dom_id, int export_id, char *frontend
return;
}
- mount = (struct mount*)malloc(sizeof(struct mount));
+ mount = (struct fs_mount*)malloc(sizeof(struct fs_mount));
mount->dom_id = frontend_dom_id;
mount->export = export;
mount->mount_id = mount_id++;
xenbus_read_mount_request(mount, frontend);
printf("Frontend found at: %s (gref=%d, evtchn=%d)\n",
- mount->frontend, mount->gref, mount->remote_evtchn);
+ mount->frontend, mount->grefs[0], mount->remote_evtchn);
xenbus_write_backend_node(mount);
mount->evth = -1;
mount->evth = xc_evtchn_open();
@@ -235,11 +237,15 @@ static void handle_connection(int frontend_dom_id, int export_id, char *frontend
mount->gnth = -1;
mount->gnth = xc_gnttab_open();
assert(mount->gnth != -1);
- sring = xc_gnttab_map_grant_ref(mount->gnth,
- mount->dom_id,
- mount->gref,
- PROT_READ | PROT_WRITE);
- BACK_RING_INIT(&mount->ring, sring, XC_PAGE_SIZE);
+ for(i=0; i<mount->shared_ring_size; i++)
+ dom_ids[i] = mount->dom_id;
+ sring = xc_gnttab_map_grant_refs(mount->gnth,
+ mount->shared_ring_size,
+ dom_ids,
+ mount->grefs,
+ PROT_READ | PROT_WRITE);
+
+ BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * XC_PAGE_SIZE);
mount->nr_entries = mount->ring.nr_ents;
for (i = 0; i < MAX_FDS; i++)
mount->fds[i] = -1;
@@ -287,7 +293,7 @@ next_select:
} while (1);
}
-struct fs_export* create_export(char *name, char *export_path)
+static struct fs_export* create_export(char *name, char *export_path)
{
struct fs_export *curr_export, **last_export;
diff --git a/tools/fs-back/fs-backend.h b/tools/fs-back/fs-backend.h
index 2ad96f88d4..b2a6be6f4a 100644
--- a/tools/fs-back/fs-backend.h
+++ b/tools/fs-back/fs-backend.h
@@ -13,6 +13,7 @@
#define EXPORTS_NODE ROOT_NODE"/"EXPORTS_SUBNODE
#define WATCH_NODE EXPORTS_NODE"/requests"
#define MAX_FDS 16
+#define MAX_RING_SIZE 16
struct fs_export
{
@@ -26,22 +27,24 @@ struct fs_request
{
int active;
void *page; /* Pointer to mapped grant */
+ int count;
struct fsif_request req_shadow;
struct aiocb aiocb;
};
-struct mount
+struct fs_mount
{
struct fs_export *export;
int dom_id;
char *frontend;
int mount_id; /* = backend id */
- grant_ref_t gref;
+ grant_ref_t grefs[MAX_RING_SIZE];
evtchn_port_t remote_evtchn;
int evth; /* Handle to the event channel */
evtchn_port_t local_evtchn;
int gnth;
+ int shared_ring_size; /* in pages */
struct fsif_back_ring ring;
int nr_entries;
struct fs_request *requests;
@@ -56,17 +59,17 @@ extern struct xs_handle *xsh;
bool xenbus_create_request_node(void);
int xenbus_register_export(struct fs_export *export);
int xenbus_get_watch_fd(void);
-void xenbus_read_mount_request(struct mount *mount, char *frontend);
-void xenbus_write_backend_node(struct mount *mount);
-void xenbus_write_backend_ready(struct mount *mount);
+void xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
+void xenbus_write_backend_node(struct fs_mount *mount);
+void xenbus_write_backend_ready(struct fs_mount *mount);
/* File operations, implemented in fs-ops.c */
struct fs_op
{
int type; /* Type of request (from fsif.h) this handlers
are responsible for */
- void (*dispatch_handler)(struct mount *mount, struct fsif_request *req);
- void (*response_handler)(struct mount *mount, struct fs_request *req);
+ void (*dispatch_handler)(struct fs_mount *mount, struct fsif_request *req);
+ void (*response_handler)(struct fs_mount *mount, struct fs_request *req);
};
/* This NULL terminated array of all file requests handlers */
diff --git a/tools/fs-back/fs-ops.c b/tools/fs-back/fs-ops.c
index 6315a4d975..424b054779 100644
--- a/tools/fs-back/fs-ops.c
+++ b/tools/fs-back/fs-ops.c
@@ -10,7 +10,7 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/vfs.h>
+#include <sys/statvfs.h>
#include <sys/mount.h>
#include <unistd.h>
#include "fs-backend.h"
@@ -23,7 +23,7 @@
#define BUFFER_SIZE 1024
-unsigned short get_request(struct mount *mount, struct fsif_request *req)
+static unsigned short get_request(struct fs_mount *mount, struct fsif_request *req)
{
unsigned short id = get_id_from_freelist(mount->freelist);
@@ -34,7 +34,7 @@ unsigned short get_request(struct mount *mount, struct fsif_request *req)
return id;
}
-int get_fd(struct mount *mount)
+static int get_fd(struct fs_mount *mount)
{
int i;
@@ -45,7 +45,7 @@ int get_fd(struct mount *mount)
}
-void dispatch_file_open(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_open(struct fs_mount *mount, struct fsif_request *req)
{
char *file_name, full_path[BUFFER_SIZE];
int fd;
@@ -93,7 +93,7 @@ void dispatch_file_open(struct mount *mount, struct fsif_request *req)
rsp->ret_val = (uint64_t)fd;
}
-void dispatch_file_close(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_close(struct fs_mount *mount, struct fsif_request *req)
{
int ret;
RING_IDX rsp_idx;
@@ -122,19 +122,25 @@ void dispatch_file_close(struct mount *mount, struct fsif_request *req)
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
}
-void dispatch_file_read(struct mount *mount, struct fsif_request *req)
+
+#define MAX_GNTS 16
+static void dispatch_file_read(struct fs_mount *mount, struct fsif_request *req)
{
void *buf;
- int fd;
+ int fd, i, count;
uint16_t req_id;
unsigned short priv_id;
struct fs_request *priv_req;
/* Read the request */
- buf = xc_gnttab_map_grant_ref(mount->gnth,
- mount->dom_id,
- req->u.fread.gref,
- PROT_WRITE);
+ assert(req->u.fread.len > 0);
+ count = (req->u.fread.len - 1) / XC_PAGE_SIZE + 1;
+ assert(count <= FSIF_NR_READ_GNTS);
+ buf = xc_gnttab_map_domain_grant_refs(mount->gnth,
+ count,
+ mount->dom_id,
+ req->u.fread.grefs,
+ PROT_WRITE);
req_id = req->id;
printf("File read issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n",
@@ -149,6 +155,7 @@ void dispatch_file_read(struct mount *mount, struct fsif_request *req)
printf("Private id is: %d\n", priv_id);
priv_req = &mount->requests[priv_id];
priv_req->page = buf;
+ priv_req->count = count;
/* Dispatch AIO read request */
bzero(&priv_req->aiocb, sizeof(struct aiocb));
@@ -164,14 +171,16 @@ out:
mount->ring.req_cons++;
}
-void end_file_read(struct mount *mount, struct fs_request *priv_req)
+static void end_file_read(struct fs_mount *mount, struct fs_request *priv_req)
{
RING_IDX rsp_idx;
fsif_response_t *rsp;
uint16_t req_id;
/* Release the grant */
- assert(xc_gnttab_munmap(mount->gnth, priv_req->page, 1) == 0);
+ assert(xc_gnttab_munmap(mount->gnth,
+ priv_req->page,
+ priv_req->count) == 0);
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -182,19 +191,23 @@ void end_file_read(struct mount *mount, struct fs_request *priv_req)
rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
}
-void dispatch_file_write(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_write(struct fs_mount *mount, struct fsif_request *req)
{
void *buf;
- int fd;
+ int fd, count, i;
uint16_t req_id;
unsigned short priv_id;
struct fs_request *priv_req;
/* Read the request */
- buf = xc_gnttab_map_grant_ref(mount->gnth,
- mount->dom_id,
- req->u.fwrite.gref,
- PROT_READ);
+ assert(req->u.fwrite.len > 0);
+ count = (req->u.fwrite.len - 1) / XC_PAGE_SIZE + 1;
+ assert(count <= FSIF_NR_WRITE_GNTS);
+ buf = xc_gnttab_map_domain_grant_refs(mount->gnth,
+ count,
+ mount->dom_id,
+ req->u.fwrite.grefs,
+ PROT_READ);
req_id = req->id;
printf("File write issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n",
@@ -209,6 +222,7 @@ void dispatch_file_write(struct mount *mount, struct fsif_request *req)
printf("Private id is: %d\n", priv_id);
priv_req = &mount->requests[priv_id];
priv_req->page = buf;
+ priv_req->count = count;
/* Dispatch AIO write request */
bzero(&priv_req->aiocb, sizeof(struct aiocb));
@@ -224,14 +238,16 @@ void dispatch_file_write(struct mount *mount, struct fsif_request *req)
mount->ring.req_cons++;
}
-void end_file_write(struct mount *mount, struct fs_request *priv_req)
+static void end_file_write(struct fs_mount *mount, struct fs_request *priv_req)
{
RING_IDX rsp_idx;
fsif_response_t *rsp;
uint16_t req_id;
/* Release the grant */
- assert(xc_gnttab_munmap(mount->gnth, priv_req->page, 1) == 0);
+ assert(xc_gnttab_munmap(mount->gnth,
+ priv_req->page,
+ priv_req->count) == 0);
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
@@ -242,7 +258,7 @@ void end_file_write(struct mount *mount, struct fs_request *priv_req)
rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
}
-void dispatch_stat(struct mount *mount, struct fsif_request *req)
+static void dispatch_stat(struct fs_mount *mount, struct fsif_request *req)
{
struct fsif_stat_response *buf;
struct stat stat;
@@ -251,12 +267,6 @@ void dispatch_stat(struct mount *mount, struct fsif_request *req)
RING_IDX rsp_idx;
fsif_response_t *rsp;
- /* Read the request */
- buf = xc_gnttab_map_grant_ref(mount->gnth,
- mount->dom_id,
- req->u.fstat.gref,
- PROT_WRITE);
-
req_id = req->id;
if (req->u.fstat.fd < MAX_FDS)
fd = mount->fds[req->u.fstat.fd];
@@ -272,38 +282,35 @@ void dispatch_stat(struct mount *mount, struct fsif_request *req)
/* Stat, and create the response */
ret = fstat(fd, &stat);
printf("Mode=%o, uid=%d, a_time=%ld\n",
- stat.st_mode, stat.st_uid, stat.st_atime);
- buf->stat_mode = stat.st_mode;
- buf->stat_uid = stat.st_uid;
- buf->stat_gid = stat.st_gid;
+ stat.st_mode, stat.st_uid, (long)stat.st_atime);
+
+ /* Get a response from the ring */
+ rsp_idx = mount->ring.rsp_prod_pvt++;
+ printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
+ rsp->id = req_id;
+ rsp->fstat.stat_ret = (uint32_t)ret;
+ rsp->fstat.stat_mode = stat.st_mode;
+ rsp->fstat.stat_uid = stat.st_uid;
+ rsp->fstat.stat_gid = stat.st_gid;
#ifdef BLKGETSIZE
if (S_ISBLK(stat.st_mode)) {
unsigned long sectors;
if (ioctl(fd, BLKGETSIZE, &sectors)) {
perror("getting device size\n");
- buf->stat_size = 0;
+ rsp->fstat.stat_size = 0;
} else
- buf->stat_size = sectors << 9;
+ rsp->fstat.stat_size = sectors << 9;
} else
#endif
- buf->stat_size = stat.st_size;
- buf->stat_atime = stat.st_atime;
- buf->stat_mtime = stat.st_mtime;
- buf->stat_ctime = stat.st_ctime;
-
- /* Release the grant */
- assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0);
-
- /* Get a response from the ring */
- rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
- rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
- rsp->id = req_id;
- rsp->ret_val = (uint64_t)ret;
+ rsp->fstat.stat_size = stat.st_size;
+ rsp->fstat.stat_atime = stat.st_atime;
+ rsp->fstat.stat_mtime = stat.st_mtime;
+ rsp->fstat.stat_ctime = stat.st_ctime;
}
-void dispatch_truncate(struct mount *mount, struct fsif_request *req)
+static void dispatch_truncate(struct fs_mount *mount, struct fsif_request *req)
{
int fd, ret;
uint16_t req_id;
@@ -335,7 +342,7 @@ void dispatch_truncate(struct mount *mount, struct fsif_request *req)
rsp->ret_val = (uint64_t)ret;
}
-void dispatch_remove(struct mount *mount, struct fsif_request *req)
+static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req)
{
char *file_name, full_path[BUFFER_SIZE];
int ret;
@@ -374,7 +381,7 @@ void dispatch_remove(struct mount *mount, struct fsif_request *req)
}
-void dispatch_rename(struct mount *mount, struct fsif_request *req)
+static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req)
{
char *buf, *old_file_name, *new_file_name;
char old_full_path[BUFFER_SIZE], new_full_path[BUFFER_SIZE];
@@ -421,7 +428,7 @@ void dispatch_rename(struct mount *mount, struct fsif_request *req)
}
-void dispatch_create(struct mount *mount, struct fsif_request *req)
+static void dispatch_create(struct fs_mount *mount, struct fsif_request *req)
{
char *file_name, full_path[BUFFER_SIZE];
int ret;
@@ -459,7 +466,17 @@ void dispatch_create(struct mount *mount, struct fsif_request *req)
else
{
printf("Issuing create for file: %s\n", full_path);
- ret = creat(full_path, mode);
+ ret = get_fd(mount);
+ if (ret >= 0) {
+ int real_fd = creat(full_path, mode);
+ if (real_fd < 0)
+ ret = -1;
+ else
+ {
+ mount->fds[ret] = real_fd;
+ printf("Got FD: %d for real %d\n", ret, real_fd);
+ }
+ }
}
printf("Got ret %d (errno=%d)\n", ret, errno);
@@ -471,7 +488,7 @@ void dispatch_create(struct mount *mount, struct fsif_request *req)
rsp->ret_val = (uint64_t)ret;
}
-void dispatch_list(struct mount *mount, struct fsif_request *req)
+static void dispatch_list(struct fs_mount *mount, struct fsif_request *req)
{
char *file_name, *buf, full_path[BUFFER_SIZE];
uint32_t offset, nr_files, error_code;
@@ -541,7 +558,7 @@ error_out:
rsp->ret_val = ret_val;
}
-void dispatch_chmod(struct mount *mount, struct fsif_request *req)
+static void dispatch_chmod(struct fs_mount *mount, struct fsif_request *req)
{
int fd, ret;
RING_IDX rsp_idx;
@@ -572,13 +589,13 @@ void dispatch_chmod(struct mount *mount, struct fsif_request *req)
rsp->ret_val = (uint64_t)ret;
}
-void dispatch_fs_space(struct mount *mount, struct fsif_request *req)
+static void dispatch_fs_space(struct fs_mount *mount, struct fsif_request *req)
{
char *file_name, full_path[BUFFER_SIZE];
RING_IDX rsp_idx;
fsif_response_t *rsp;
uint16_t req_id;
- struct statfs stat;
+ struct statvfs stat;
int64_t ret;
printf("Dispatching fs space operation (gref=%d).\n", req->u.fspace.gref);
@@ -596,7 +613,7 @@ void dispatch_fs_space(struct mount *mount, struct fsif_request *req)
mount->export->export_path, file_name);
assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
printf("Issuing fs space for %s\n", full_path);
- ret = statfs(full_path, &stat);
+ ret = statvfs(full_path, &stat);
if(ret >= 0)
ret = stat.f_bsize * stat.f_bfree;
@@ -613,7 +630,7 @@ void dispatch_fs_space(struct mount *mount, struct fsif_request *req)
rsp->ret_val = (uint64_t)ret;
}
-void dispatch_file_sync(struct mount *mount, struct fsif_request *req)
+static void dispatch_file_sync(struct fs_mount *mount, struct fsif_request *req)
{
int fd;
uint16_t req_id;
@@ -643,7 +660,7 @@ void dispatch_file_sync(struct mount *mount, struct fsif_request *req)
mount->ring.req_cons++;
}
-void end_file_sync(struct mount *mount, struct fs_request *priv_req)
+static void end_file_sync(struct fs_mount *mount, struct fs_request *priv_req)
{
RING_IDX rsp_idx;
fsif_response_t *rsp;
diff --git a/tools/fs-back/fs-xenbus.c b/tools/fs-back/fs-xenbus.c
index 3a3009465f..6a86e245e0 100644
--- a/tools/fs-back/fs-xenbus.c
+++ b/tools/fs-back/fs-xenbus.c
@@ -109,10 +109,11 @@ int xenbus_get_watch_fd(void)
return xs_fileno(xsh);
}
-void xenbus_read_mount_request(struct mount *mount, char *frontend)
+void xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
{
char node[1024];
char *s;
+ int i;
assert(xsh != NULL);
#if 0
@@ -125,10 +126,18 @@ void xenbus_read_mount_request(struct mount *mount, char *frontend)
s = xs_read(xsh, XBT_NULL, node, NULL);
assert(strcmp(s, STATE_READY) == 0);
free(s);
- snprintf(node, sizeof(node), "%s/ring-ref", frontend);
+ snprintf(node, sizeof(node), "%s/ring-size", frontend);
s = xs_read(xsh, XBT_NULL, node, NULL);
- mount->gref = atoi(s);
+ mount->shared_ring_size = atoi(s);
+ assert(mount->shared_ring_size <= MAX_RING_SIZE);
free(s);
+ for(i=0; i<mount->shared_ring_size; i++)
+ {
+ snprintf(node, sizeof(node), "%s/ring-ref-%d", frontend, i);
+ s = xs_read(xsh, XBT_NULL, node, NULL);
+ mount->grefs[i] = atoi(s);
+ free(s);
+ }
snprintf(node, sizeof(node), "%s/event-channel", frontend);
s = xs_read(xsh, XBT_NULL, node, NULL);
mount->remote_evtchn = atoi(s);
@@ -150,7 +159,7 @@ static int get_self_id(void)
}
-void xenbus_write_backend_node(struct mount *mount)
+void xenbus_write_backend_node(struct fs_mount *mount)
{
char node[1024], backend_node[1024];
int self_id;
@@ -167,7 +176,7 @@ void xenbus_write_backend_node(struct mount *mount)
xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED));
}
-void xenbus_write_backend_ready(struct mount *mount)
+void xenbus_write_backend_ready(struct fs_mount *mount)
{
char node[1024];
int self_id;
diff --git a/tools/include/xen-sys/MiniOS/privcmd.h b/tools/include/xen-sys/MiniOS/privcmd.h
index 97e0ceb5a0..db0f00e3ec 100644
--- a/tools/include/xen-sys/MiniOS/privcmd.h
+++ b/tools/include/xen-sys/MiniOS/privcmd.h
@@ -10,9 +10,7 @@ typedef struct privcmd_hypercall
} privcmd_hypercall_t;
typedef struct privcmd_mmap_entry {
- u64 va;
u64 mfn;
- u64 npages;
} privcmd_mmap_entry_t;
#endif /* __MINIOS_PUBLIC_PRIVCMD_H__ */
diff --git a/tools/ioemu/block-vbd.c b/tools/ioemu/block-vbd.c
index a3465b748a..58015bdf00 100644
--- a/tools/ioemu/block-vbd.c
+++ b/tools/ioemu/block-vbd.c
@@ -273,6 +273,10 @@ static BlockDriverAIOCB *vbd_aio_flush(BlockDriverState *bs,
BDRVVbdState *s = bs->opaque;
VbdAIOCB *acb = NULL;
+ if (s->info.mode == O_RDONLY) {
+ cb(opaque, 0);
+ return NULL;
+ }
if (s->info.barrier == 1) {
acb = vbd_aio_setup(bs, 0, NULL, 0,
s->info.flush == 1 ? vbd_nop_cb : cb, opaque);
diff --git a/tools/ioemu/hw/pass-through.c b/tools/ioemu/hw/pass-through.c
index 0e17af915e..3903a0a1fc 100644
--- a/tools/ioemu/hw/pass-through.c
+++ b/tools/ioemu/hw/pass-through.c
@@ -138,6 +138,13 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
struct pt_reg_tbl *cfg_entry,
uint16_t *value, uint16_t dev_value, uint16_t valid_mask);
+/* pt_reg_info_tbl declaration
+ * - only for emulated register (either a part or whole bit).
+ * - for passthrough register that need special behavior (like interacting with
+ * other component), set emu_mask to all 0 and specify r/w func properly.
+ * - do NOT use ALL F for init_val, otherwise the tbl will not be registered.
+ */
+
/* Header Type0 reg static infomation table */
static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
/* Command reg */
@@ -564,6 +571,13 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = {
},
};
+/* pt_reg_grp_info_tbl declaration
+ * - only for emulated or zero-hardwired register group.
+ * - for register group with dynamic size, just set grp_size to 0xFF and
+ * specify size_init func properly.
+ * - no need to specify emu_reg_tbl for zero-hardwired type.
+ */
+
/* emul reg group static infomation table */
static const struct pt_reg_grp_info_tbl pt_emu_reg_grp_tbl[] = {
/* Header Type0 reg group */
@@ -821,7 +835,7 @@ void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
assigned_device->bases[i].e_size= e_size;
PT_LOG("e_phys=%08x maddr=%lx type=%d len=%d index=%d first_map=%d\n",
- e_phys, assigned_device->bases[i].access.maddr,
+ e_phys, (unsigned long)assigned_device->bases[i].access.maddr,
type, e_size, i, first_map);
if ( e_size == 0 )
@@ -843,7 +857,7 @@ void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
}
}
- /* map only valid guest address (include 0) */
+ /* map only valid guest address */
if (e_phys != -1)
{
/* Create new mapping */
@@ -860,7 +874,7 @@ void pt_iomem_map(PCIDevice *d, int i, uint32_t e_phys, uint32_t e_size,
ret = remove_msix_mapping(assigned_device, i);
if ( ret != 0 )
- PT_LOG("Error: remove MSX-X mmio mapping failed!\n");
+ PT_LOG("Error: remove MSI-X mmio mapping failed!\n");
}
}
@@ -996,8 +1010,11 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val,
int index = 0;
int ret = 0;
- PT_LOG("write(%x.%x): address=%04x val=0x%08x len=%d\n",
- (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
+#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
+ PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n",
+ pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7),
+ address, val, len);
+#endif
/* check offset range */
if (address >= 0xFF)
@@ -1049,7 +1066,10 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val,
if (reg_grp->grp_type == GRP_TYPE_HARDWIRED)
{
/* ignore silently */
- PT_LOG("Access to 0 Hardwired register.\n");
+ PT_LOG("Access to 0 Hardwired register. "
+ "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
+ pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F),
+ (d->devfn & 0x7), address, len);
goto exit;
}
}
@@ -1067,22 +1087,22 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val,
break;
}
- /* check libpci error */
+ /* check libpci result */
valid_mask = (0xFFFFFFFF >> ((4 - len) << 3));
if ((read_val & valid_mask) == valid_mask)
{
- PT_LOG("libpci read error. No emulation. "
+ PT_LOG("Warning: Return ALL F from libpci read. "
"[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
address, len);
- goto exit;
}
/* pass directly to libpci for passthrough type register group */
if (reg_grp_entry == NULL)
goto out;
- /* adjust the write value to appropriate CFC-CFF window */
+ /* adjust the read and write value to appropriate CFC-CFF window */
+ read_val <<= ((address & 3) << 3);
val <<= ((address & 3) << 3);
emul_len = len;
@@ -1131,7 +1151,8 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t address, uint32_t val,
if (ret < 0)
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Invalid write emulation "
+ "return value[%d]. I/O emulator exit.\n", ret);
exit(1);
}
@@ -1186,9 +1207,6 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len)
int emul_len = 0;
int ret = 0;
- PT_LOG("read(%x.%x): address=%04x len=%d\n",
- (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, len);
-
/* check offset range */
if (address >= 0xFF)
{
@@ -1246,15 +1264,14 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len)
break;
}
- /* check libpci error */
+ /* check libpci result */
valid_mask = (0xFFFFFFFF >> ((4 - len) << 3));
if ((val & valid_mask) == valid_mask)
{
- PT_LOG("libpci read error. No emulation. "
+ PT_LOG("Warning: Return ALL F from libpci read. "
"[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
address, len);
- goto exit;
}
/* just return the I/O device register value for
@@ -1309,7 +1326,8 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len)
if (ret < 0)
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Invalid read emulation "
+ "return value[%d]. I/O emulator exit.\n", ret);
exit(1);
}
@@ -1332,6 +1350,13 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len)
val >>= ((address & 3) << 3);
exit:
+
+#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
+ PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n",
+ pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7),
+ address, val, len);
+#endif
+
return val;
}
@@ -1389,7 +1414,7 @@ static int pt_register_regions(struct pt_dev *assigned_device)
return 0;
}
-static int pt_unregister_regions(struct pt_dev *assigned_device)
+static void pt_unregister_regions(struct pt_dev *assigned_device)
{
int i, type, ret;
uint32_t e_size;
@@ -1488,7 +1513,9 @@ static int pt_bar_reg_parse(
/* check 64bit BAR */
index = pt_bar_offset_to_index(reg->offset);
if ((index > 0) && (index < PCI_ROM_SLOT) &&
- (d->config[bar_64] & PCI_BASE_ADDRESS_MEM_TYPE_64))
+ ((d->config[bar_64] & (PCI_BASE_ADDRESS_SPACE |
+ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
+ (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64)))
{
region = &ptdev->bases[index-1];
if (region->bar_flag != PT_BAR_FLAG_UPPER)
@@ -1503,6 +1530,13 @@ static int pt_bar_reg_parse(
if (!r->size)
goto out;
+ /* for ExpROM BAR */
+ if (index == PCI_ROM_SLOT)
+ {
+ bar_flag = PT_BAR_FLAG_MEM;
+ goto out;
+ }
+
/* check BAR I/O indicator */
if (d->config[reg->offset] & PCI_BASE_ADDRESS_SPACE_IO)
bar_flag = PT_BAR_FLAG_IO;
@@ -1540,7 +1574,7 @@ static void pt_bar_mapping(struct pt_dev *ptdev, int io_enable, int mem_enable)
/* copy region address to temporary */
r_addr = r->addr;
- /* clear region address in case I/O Space or Memory Space disable */
+ /* need unmapping in case I/O Space or Memory Space disable */
if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable ) ||
((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable ))
r_addr = -1;
@@ -1556,8 +1590,10 @@ static void pt_bar_mapping(struct pt_dev *ptdev, int io_enable, int mem_enable)
/* check overlapped address */
ret = pt_chk_bar_overlap(dev->bus, dev->devfn, r_addr, r_size);
if (ret > 0)
- PT_LOG("Base Address[%d] is overlapped. "
- "[Address:%08xh][Size:%04xh]\n", i, r_addr, r_size);
+ PT_LOG("ptdev[%02x:%02x.%x][Region:%d][Address:%08xh][Size:%08xh] "
+ "is overlapped.\n", pci_bus_num(dev->bus),
+ (dev->devfn >> 3) & 0x1F, (dev->devfn & 0x7),
+ i, r_addr, r_size);
/* check whether we need to update the mapping or not */
if (r_addr != ptdev->bases[i].e_physbase)
@@ -1776,14 +1812,16 @@ static uint32_t pt_status_reg_init(struct pt_dev *ptdev,
else
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Couldn't find pt_reg_tbl for "
+ "Capabilities Pointer register. I/O emulator exit.\n");
exit(1);
}
}
else
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Couldn't find pt_reg_grp_tbl for Header. "
+ "I/O emulator exit.\n");
exit(1);
}
@@ -1815,7 +1853,8 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
if (index < 0)
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Invalid BAR index[%d]. "
+ "I/O emulator exit.\n", index);
exit(1);
}
@@ -1962,9 +2001,8 @@ static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
ptdev->msi = malloc(sizeof(struct pt_msi_info));
if ( !ptdev->msi )
{
- PT_LOG("error allocation pt_msi_info\n");
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("error allocation pt_msi_info. I/O emulator exit.\n");
exit(1);
}
memset(ptdev->msi, 0, sizeof(struct pt_msi_info));
@@ -1983,7 +2021,8 @@ static uint8_t pt_msix_size_init(struct pt_dev *ptdev,
if (ret == -1)
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Invalid pt_msix_init return value[%d]. "
+ "I/O emulator exit.\n", ret);
exit(1);
}
@@ -2060,7 +2099,8 @@ static int pt_bar_reg_read(struct pt_dev *ptdev,
if (index < 0)
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Invalid BAR index[%d]. "
+ "I/O emulator exit.\n", index);
exit(1);
}
@@ -2074,8 +2114,8 @@ static int pt_bar_reg_read(struct pt_dev *ptdev,
bar_emu_mask = PT_BAR_IO_EMU_MASK;
break;
case PT_BAR_FLAG_UPPER:
- *value = 0;
- goto out;
+ bar_emu_mask = PT_BAR_ALLF;
+ break;
default:
break;
}
@@ -2085,7 +2125,6 @@ static int pt_bar_reg_read(struct pt_dev *ptdev,
*value = ((*value & ~valid_emu_mask) |
(cfg_entry->data & valid_emu_mask));
-out:
return 0;
}
@@ -2201,12 +2240,13 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
uint32_t r_size = 0;
int index = 0;
- /* get BAR index */
+ /* get BAR index */
index = pt_bar_offset_to_index(reg->offset);
if (index < 0)
{
/* exit I/O emulator */
- PT_LOG("I/O emulator exit()\n");
+ PT_LOG("Internal error: Invalid BAR index[%d]. "
+ "I/O emulator exit.\n", index);
exit(1);
}
@@ -2216,89 +2256,113 @@ static int pt_bar_reg_write(struct pt_dev *ptdev,
/* align resource size (memory type only) */
PT_GET_EMUL_SIZE(base->bar_flag, r_size);
- /* check guest write value */
- if (*value == PT_BAR_ALLF)
+ /* set emulate mask and read-only mask depend on BAR flag */
+ switch (ptdev->bases[index].bar_flag)
{
- /* set register with resource size alligned to page size */
- cfg_entry->data = ~(r_size - 1);
- /* avoid writing ALL F to I/O device register */
- *value = dev_value;
+ case PT_BAR_FLAG_MEM:
+ bar_emu_mask = PT_BAR_MEM_EMU_MASK;
+ bar_ro_mask = PT_BAR_MEM_RO_MASK | (r_size - 1);
+ break;
+ case PT_BAR_FLAG_IO:
+ bar_emu_mask = PT_BAR_IO_EMU_MASK;
+ bar_ro_mask = PT_BAR_IO_RO_MASK | (r_size - 1);
+ break;
+ case PT_BAR_FLAG_UPPER:
+ bar_emu_mask = PT_BAR_ALLF;
+ bar_ro_mask = 0; /* all upper 32bit are R/W */
+ break;
+ default:
+ break;
}
- else
+
+ /* modify emulate register */
+ writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
+ cfg_entry->data = ((*value & writable_mask) |
+ (cfg_entry->data & ~writable_mask));
+
+ /* check whether we need to update the virtual region address or not */
+ switch (ptdev->bases[index].bar_flag)
{
- /* set emulate mask and read-only mask depend on BAR flag */
- switch (ptdev->bases[index].bar_flag)
+ case PT_BAR_FLAG_MEM:
+ /* nothing to do */
+ break;
+ case PT_BAR_FLAG_IO:
+ new_addr = cfg_entry->data;
+ last_addr = new_addr + r_size - 1;
+ /* check invalid address */
+ if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000)
{
- case PT_BAR_FLAG_MEM:
- bar_emu_mask = PT_BAR_MEM_EMU_MASK;
- bar_ro_mask = PT_BAR_MEM_RO_MASK;
- break;
- case PT_BAR_FLAG_IO:
- new_addr = *value;
- last_addr = new_addr + r_size - 1;
/* check 64K range */
- if (last_addr <= new_addr || !new_addr || last_addr >= 0x10000)
+ if ((last_addr >= 0x10000) &&
+ (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask)))
{
PT_LOG("Guest attempt to set Base Address over the 64KB. "
- "[%02x:%02x.%x][Offset:%02xh][Range:%08xh-%08xh]\n",
+ "[%02x:%02x.%x][Offset:%02xh][Address:%08xh][Size:%08xh]\n",
pci_bus_num(d->bus),
((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
- reg->offset, new_addr, last_addr);
- /* just remove mapping */
- r->addr = -1;
- goto exit;
+ reg->offset, new_addr, r_size);
}
- bar_emu_mask = PT_BAR_IO_EMU_MASK;
- bar_ro_mask = PT_BAR_IO_RO_MASK;
- break;
- case PT_BAR_FLAG_UPPER:
- if (*value)
+ /* just remove mapping */
+ r->addr = -1;
+ goto exit;
+ }
+ break;
+ case PT_BAR_FLAG_UPPER:
+ if (cfg_entry->data)
+ {
+ if (cfg_entry->data != (PT_BAR_ALLF & ~bar_ro_mask))
{
PT_LOG("Guest attempt to set high MMIO Base Address. "
- "Ignore mapping. "
- "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n",
+ "Ignore mapping. "
+ "[%02x:%02x.%x][Offset:%02xh][High Address:%08xh]\n",
pci_bus_num(d->bus),
((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
- reg->offset, *value);
- /* clear lower address */
- d->io_regions[index-1].addr = -1;
+ reg->offset, cfg_entry->data);
}
- else
+ /* clear lower address */
+ d->io_regions[index-1].addr = -1;
+ }
+ else
+ {
+ /* find lower 32bit BAR */
+ prev_offset = (reg->offset - 4);
+ reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
+ if (reg_grp_entry)
{
- /* find lower 32bit BAR */
- prev_offset = (reg->offset - 4);
- reg_grp_entry = pt_find_reg_grp(ptdev, prev_offset);
- if (reg_grp_entry)
- {
- reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
- if (reg_entry)
- /* restore lower address */
- d->io_regions[index-1].addr = reg_entry->data;
- else
- return -1;
- }
+ reg_entry = pt_find_reg(reg_grp_entry, prev_offset);
+ if (reg_entry)
+ /* restore lower address */
+ d->io_regions[index-1].addr = reg_entry->data;
else
return -1;
}
- cfg_entry->data = 0;
- r->addr = -1;
- goto exit;
+ else
+ return -1;
}
- /* modify emulate register */
- writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
- cfg_entry->data = ((*value & writable_mask) |
- (cfg_entry->data & ~writable_mask));
- /* update the corresponding virtual region address */
- r->addr = cfg_entry->data;
+ /* always keep the emulate register value to 0,
+ * because hvmloader does not support high MMIO for now.
+ */
+ cfg_entry->data = 0;
- /* create value for writing to I/O device register */
- throughable_mask = ~bar_emu_mask & valid_mask;
- *value = ((*value & throughable_mask) |
- (dev_value & ~throughable_mask));
+ /* never mapping the 'empty' upper region,
+ * because we'll do it enough for the lower region.
+ */
+ r->addr = -1;
+ goto exit;
+ default:
+ break;
}
+ /* update the corresponding virtual region address */
+ r->addr = cfg_entry->data;
+
exit:
+ /* create value for writing to I/O device register */
+ throughable_mask = ~bar_emu_mask & valid_mask;
+ *value = ((*value & throughable_mask) |
+ (dev_value & ~throughable_mask));
+
return 0;
}
@@ -2314,6 +2378,8 @@ static int pt_exp_rom_bar_reg_write(struct pt_dev *ptdev,
uint32_t writable_mask = 0;
uint32_t throughable_mask = 0;
uint32_t r_size = 0;
+ uint32_t bar_emu_mask = 0;
+ uint32_t bar_ro_mask = 0;
r = &d->io_regions[PCI_ROM_SLOT];
r_size = r->size;
@@ -2321,28 +2387,22 @@ static int pt_exp_rom_bar_reg_write(struct pt_dev *ptdev,
/* align memory type resource size */
PT_GET_EMUL_SIZE(base->bar_flag, r_size);
- /* check guest write value */
- if (*value == PT_BAR_ALLF)
- {
- /* set register with resource size alligned to page size */
- cfg_entry->data = ~(r_size - 1);
- /* avoid writing ALL F to I/O device register */
- *value = dev_value;
- }
- else
- {
- /* modify emulate register */
- writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
- cfg_entry->data = ((*value & writable_mask) |
- (cfg_entry->data & ~writable_mask));
- /* update the corresponding virtual region address */
- r->addr = cfg_entry->data;
+ /* set emulate mask and read-only mask */
+ bar_emu_mask = reg->emu_mask;
+ bar_ro_mask = reg->ro_mask | (r_size - 1);
- /* create value for writing to I/O device register */
- throughable_mask = ~reg->emu_mask & valid_mask;
- *value = ((*value & throughable_mask) |
- (dev_value & ~throughable_mask));
- }
+ /* modify emulate register */
+ writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask;
+ cfg_entry->data = ((*value & writable_mask) |
+ (cfg_entry->data & ~writable_mask));
+
+ /* update the corresponding virtual region address */
+ r->addr = cfg_entry->data;
+
+ /* create value for writing to I/O device register */
+ throughable_mask = ~bar_emu_mask & valid_mask;
+ *value = ((*value & throughable_mask) |
+ (dev_value & ~throughable_mask));
return 0;
}
@@ -2484,8 +2544,6 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
uint16_t old_ctrl = cfg_entry->data;
PCIDevice *pd = (PCIDevice *)ptdev;
- PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
-
/* Currently no support for multi-vector */
if ((*value & PCI_MSI_FLAGS_QSIZE) != 0x0)
PT_LOG("try to set more than 1 vector ctrl %x\n", *value);
@@ -2527,8 +2585,6 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
else
ptdev->msi->flags &= ~PCI_MSI_FLAGS_ENABLE;
- PT_LOG("[after] wr_val:%xh\n", *value);
-
return 0;
}
@@ -2542,8 +2598,6 @@ static int pt_msgaddr32_reg_write(struct pt_dev *ptdev,
uint32_t throughable_mask = 0;
uint32_t old_addr = cfg_entry->data;
- PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
-
/* modify emulate register */
writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
cfg_entry->data = ((*value & writable_mask) |
@@ -2564,8 +2618,6 @@ static int pt_msgaddr32_reg_write(struct pt_dev *ptdev,
pt_msi_update(ptdev);
}
- PT_LOG("[after] wr_val:%xh\n", *value);
-
return 0;
}
@@ -2579,8 +2631,6 @@ static int pt_msgaddr64_reg_write(struct pt_dev *ptdev,
uint32_t throughable_mask = 0;
uint32_t old_addr = cfg_entry->data;
- PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
-
/* check whether the type is 64 bit or not */
if (!(ptdev->msi->flags & PCI_MSI_FLAGS_64BIT))
{
@@ -2609,8 +2659,6 @@ static int pt_msgaddr64_reg_write(struct pt_dev *ptdev,
pt_msi_update(ptdev);
}
- PT_LOG("[after] wr_val:%xh\n", *value);
-
return 0;
}
@@ -2627,8 +2675,6 @@ static int pt_msgdata_reg_write(struct pt_dev *ptdev,
uint32_t flags = ptdev->msi->flags;
uint32_t offset = reg->offset;
- PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
-
/* check the offset whether matches the type or not */
if (!((offset == PCI_MSI_DATA_64) && (flags & PCI_MSI_FLAGS_64BIT)) &&
!((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT)))
@@ -2658,8 +2704,6 @@ static int pt_msgdata_reg_write(struct pt_dev *ptdev,
pt_msi_update(ptdev);
}
- PT_LOG("[after] wr_val:%xh\n", *value);
-
return 0;
}
@@ -2673,8 +2717,6 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
uint16_t throughable_mask = 0;
uint16_t old_ctrl = cfg_entry->data;
- PT_LOG("[before] dev_val:%xh wr_val:%xh\n", dev_value, *value);
-
/* modify emulate register */
writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
cfg_entry->data = ((*value & writable_mask) |
@@ -2692,8 +2734,6 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE);
- PT_LOG("[after] wr_val:%xh\n", *value);
-
return 0;
}
@@ -2785,8 +2825,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
int pirq = pci_dev->irq;
machine_irq = pci_dev->irq;
- rc = xc_physdev_map_pirq(xc_handle, domid, MAP_PIRQ_TYPE_GSI,
- machine_irq, &pirq);
+ rc = xc_physdev_map_pirq(xc_handle, domid, machine_irq, &pirq);
if ( rc )
{
diff --git a/tools/ioemu/hw/pass-through.h b/tools/ioemu/hw/pass-through.h
index 5cce8522a6..d8344f26f9 100644
--- a/tools/ioemu/hw/pass-through.h
+++ b/tools/ioemu/hw/pass-through.h
@@ -47,12 +47,20 @@
/* because the current version of libpci (2.2.0) doesn't define these ID,
* so we define Capability ID here.
*/
+#ifndef PCI_CAP_ID_HOTPLUG
/* SHPC Capability List Item reg group */
#define PCI_CAP_ID_HOTPLUG 0x0C
+#endif
+
+#ifndef PCI_CAP_ID_SSVID
/* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */
#define PCI_CAP_ID_SSVID 0x0D
+#endif
+
+#ifndef PCI_MSI_FLAGS_MASK_BIT
/* interrupt masking & reporting supported */
#define PCI_MSI_FLAGS_MASK_BIT 0x0100
+#endif
#define PT_INVALID_REG 0xFFFFFFFF /* invalid register value */
#define PT_BAR_ALLF 0xFFFFFFFF /* BAR ALLF value */
diff --git a/tools/ioemu/hw/pc.c b/tools/ioemu/hw/pc.c
index fc25a4224e..999b4f4e9e 100644
--- a/tools/ioemu/hw/pc.c
+++ b/tools/ioemu/hw/pc.c
@@ -31,9 +31,6 @@
#define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
#define LINUX_BOOT_FILENAME "linux_boot.bin"
-/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */
-#define ACPI_DATA_SIZE 0x10000
-
static fdctrl_t *floppy_controller;
static RTCState *rtc_state;
#ifndef CONFIG_DM
@@ -542,6 +539,7 @@ static void load_linux(const char *kernel_filename,
uint16_t seg[6];
uint16_t real_seg;
int setup_size, kernel_size, initrd_size, cmdline_size;
+ unsigned long end_low_ram;
uint32_t initrd_max;
uint8_t header[1024];
target_phys_addr_t real_addr, reloc_prot_addr, prot_addr, cmdline_addr, initrd_addr;
@@ -595,15 +593,14 @@ static void load_linux(const char *kernel_filename,
(size_t)cmdline_addr,
(size_t)prot_addr);
- /* highest address for loading the initrd */
- if (protocol >= 0x203)
- initrd_max = ldl_p(header+0x22c);
- else
- initrd_max = 0x37ffffff;
-
- if (initrd_max >= ram_size-ACPI_DATA_SIZE)
- initrd_max = ram_size-ACPI_DATA_SIZE-1;
+ /* Special pages are placed at end of low RAM: pick an arbitrary one and
+ * subtract a suitably large amount of padding (64kB) to skip BIOS data. */
+ xc_get_hvm_param(xc_handle, domid, HVM_PARAM_BUFIOREQ_PFN, &end_low_ram);
+ end_low_ram = (end_low_ram << 12) - (64*1024);
+ /* highest address for loading the initrd */
+ initrd_max = (protocol >= 0x203) ? ldl_p(header+0x22c) : 0x37ffffff;
+ initrd_max = MIN(initrd_max, (uint32_t)end_low_ram);
/* kernel command line */
ncmdline = strlen(kernel_cmdline);
diff --git a/tools/ioemu/hw/pci.c b/tools/ioemu/hw/pci.c
index b68dd3f91a..affce42f72 100644
--- a/tools/ioemu/hw/pci.c
+++ b/tools/ioemu/hw/pci.c
@@ -664,9 +664,10 @@ int pt_chk_bar_overlap(PCIBus *bus, int devfn, uint32_t addr, uint32_t size)
r = &devices->io_regions[j];
if ((addr < (r->addr + r->size)) && ((addr + size) > r->addr))
{
- printf("Overlapped to device[%02x:%02x.%x] region:%d addr:%08x"
- " size:%08x\n", bus->bus_num, (devices->devfn >> 3) & 0x1F,
- (devices->devfn & 0x7), j, r->addr, r->size);
+ printf("Overlapped to device[%02x:%02x.%x][Region:%d]"
+ "[Address:%08xh][Size:%08xh]\n", bus->bus_num,
+ (devices->devfn >> 3) & 0x1F, (devices->devfn & 0x7),
+ j, r->addr, r->size);
ret = 1;
goto out;
}
diff --git a/tools/ioemu/hw/pt-msi.c b/tools/ioemu/hw/pt-msi.c
index 1936677335..68a952f059 100644
--- a/tools/ioemu/hw/pt-msi.c
+++ b/tools/ioemu/hw/pt-msi.c
@@ -37,8 +37,7 @@ int pt_msi_setup(struct pt_dev *dev)
return -1;
}
- if ( xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI,
- AUTO_ASSIGN, &pirq,
+ if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
dev->pci_dev->dev << 3 | dev->pci_dev->func,
dev->pci_dev->bus, 0, 1) )
{
@@ -120,8 +119,7 @@ static int pt_msix_update_one(struct pt_dev *dev, int entry_nr)
/* Check if this entry is already mapped */
if ( entry->pirq == -1 )
{
- ret = xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI,
- AUTO_ASSIGN, &pirq,
+ ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
dev->pci_dev->dev << 3 | dev->pci_dev->func,
dev->pci_dev->bus, entry_nr, 0);
if ( ret )
diff --git a/tools/ioemu/hw/serial.c b/tools/ioemu/hw/serial.c
index c6758315a2..30d9fbe620 100644
--- a/tools/ioemu/hw/serial.c
+++ b/tools/ioemu/hw/serial.c
@@ -728,7 +728,6 @@ static int serial_load(QEMUFile *f, void *opaque, int version_id)
qemu_get_8s(f,&s->lsr);
qemu_get_8s(f,&s->msr);
qemu_get_8s(f,&s->scr);
- qemu_get_8s(f,&s->fcr);
if (version_id >= 2)
qemu_get_8s(f,&fcr);
diff --git a/tools/ioemu/hw/vga.c b/tools/ioemu/hw/vga.c
index eb5b969caa..6b1f7439bc 100644
--- a/tools/ioemu/hw/vga.c
+++ b/tools/ioemu/hw/vga.c
@@ -1548,8 +1548,8 @@ static void vga_draw_graphic(VGAState *s, int full_update)
} else {
/* ENODATA just means we have changed mode and will succeed
* next time */
- if (err != -ENODATA)
- fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d)\n", s->lfb_addr + y, npages, err);
+ if (errno != ENODATA)
+ fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d, %d)\n", s->lfb_addr + y, npages, err, errno);
}
}
diff --git a/tools/ioemu/vl.c b/tools/ioemu/vl.c
index 129e0d4475..9d526057e4 100644
--- a/tools/ioemu/vl.c
+++ b/tools/ioemu/vl.c
@@ -7136,8 +7136,10 @@ int main(int argc, char **argv)
sigaddset(&set, aio_sig_num);
sigprocmask(SIG_BLOCK, &set, NULL);
}
+#endif
QEMU_LIST_INIT (&vm_change_state_head);
+#ifndef CONFIG_STUBDOM
#ifndef _WIN32
{
struct sigaction act;
diff --git a/tools/libaio/src/Makefile b/tools/libaio/src/Makefile
index f926670e67..da981b7178 100644
--- a/tools/libaio/src/Makefile
+++ b/tools/libaio/src/Makefile
@@ -1,7 +1,7 @@
XEN_ROOT = ../../..
include $(XEN_ROOT)/tools/Rules.mk
-prefix=/usr
+prefix=$(PREFIX)
includedir=$(prefix)/include
libdir=$(prefix)/lib
diff --git a/tools/libxc/xc_dom_boot.c b/tools/libxc/xc_dom_boot.c
index 30906ee993..f117d9ed38 100644
--- a/tools/libxc/xc_dom_boot.c
+++ b/tools/libxc/xc_dom_boot.c
@@ -4,7 +4,7 @@
* This is the code which actually boots a fresh
* prepared domain image as xen guest domain.
*
- * ==> this is the only domain bilder code piece
+ * ==> this is the only domain builder code piece
* where xen hypercalls are allowed <==
*
* This code is licenced under the GPL.
@@ -153,7 +153,7 @@ void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
int page_shift = XC_DOM_PAGE_SHIFT(dom);
privcmd_mmap_entry_t *entries;
void *ptr;
- int i, rc;
+ int i;
int err;
entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
@@ -165,9 +165,13 @@ void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
return NULL;
}
- ptr = mmap(NULL, count << page_shift, PROT_READ | PROT_WRITE,
- MAP_SHARED, dom->guest_xc, 0);
- if ( ptr == MAP_FAILED )
+ for ( i = 0; i < count; i++ )
+ entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
+
+ ptr = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid,
+ count << page_shift, PROT_READ | PROT_WRITE, 1 << page_shift,
+ entries, count);
+ if ( ptr == NULL )
{
err = errno;
xc_dom_panic(XC_INTERNAL_ERROR,
@@ -177,22 +181,6 @@ void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
return NULL;
}
- for ( i = 0; i < count; i++ )
- {
- entries[i].va = (uintptr_t) ptr + (i << page_shift);
- entries[i].mfn = xc_dom_p2m_host(dom, pfn + i);
- entries[i].npages = 1;
- }
-
- rc = xc_map_foreign_ranges(dom->guest_xc, dom->guest_domid,
- entries, count);
- if ( rc < 0 )
- {
- xc_dom_panic(XC_INTERNAL_ERROR,
- "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
- " [xenctl, rc=%d]\n", __FUNCTION__, pfn, count, rc);
- return NULL;
- }
return ptr;
}
diff --git a/tools/libxc/xc_domain_save.c b/tools/libxc/xc_domain_save.c
index 8a16b928bc..687e0d84fa 100644
--- a/tools/libxc/xc_domain_save.c
+++ b/tools/libxc/xc_domain_save.c
@@ -568,16 +568,19 @@ static xen_pfn_t *xc_map_m2p(int xc_handle,
unsigned long m2p_chunks, m2p_size;
xen_pfn_t *m2p;
xen_pfn_t *extent_start;
- int i, rc;
+ int i;
+ m2p = NULL;
m2p_size = M2P_SIZE(max_mfn);
m2p_chunks = M2P_CHUNKS(max_mfn);
xmml.max_extents = m2p_chunks;
- if ( !(extent_start = malloc(m2p_chunks * sizeof(xen_pfn_t))) )
+
+ extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t));
+ if ( !extent_start )
{
ERROR("failed to allocate space for m2p mfns");
- return NULL;
+ goto err0;
}
set_xen_guest_handle(xmml.extent_start, extent_start);
@@ -585,41 +588,36 @@ static xen_pfn_t *xc_map_m2p(int xc_handle,
(xmml.nr_extents != m2p_chunks) )
{
ERROR("xc_get_m2p_mfns");
- return NULL;
- }
-
- if ( (m2p = mmap(NULL, m2p_size, prot,
- MAP_SHARED, xc_handle, 0)) == MAP_FAILED )
- {
- ERROR("failed to mmap m2p");
- return NULL;
+ goto err1;
}
- if ( !(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t))) )
+ entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t));
+ if (entries == NULL)
{
ERROR("failed to allocate space for mmap entries");
- return NULL;
+ goto err1;
}
for ( i = 0; i < m2p_chunks; i++ )
- {
- entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
entries[i].mfn = extent_start[i];
- entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
- }
- if ( (rc = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
- entries, m2p_chunks)) < 0 )
+ m2p = xc_map_foreign_ranges(xc_handle, DOMID_XEN,
+ m2p_size, prot, M2P_CHUNK_SIZE,
+ entries, m2p_chunks);
+ if (m2p == NULL)
{
- ERROR("xc_mmap_foreign_ranges failed (rc = %d)", rc);
- return NULL;
+ ERROR("xc_mmap_foreign_ranges failed");
+ goto err2;
}
m2p_mfn0 = entries[0].mfn;
- free(extent_start);
+err2:
free(entries);
+err1:
+ free(extent_start);
+err0:
return m2p;
}
diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
index 8bf5549188..752c4e76dc 100644
--- a/tools/libxc/xc_hvm_build.c
+++ b/tools/libxc/xc_hvm_build.c
@@ -115,42 +115,32 @@ static int loadelfimage(
struct elf_binary *elf, int xch, uint32_t dom, unsigned long *parray)
{
privcmd_mmap_entry_t *entries = NULL;
- int pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ size_t pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
int i, rc = -1;
/* Map address space for initial elf image. */
- entries = malloc(pages * sizeof(privcmd_mmap_entry_t));
+ entries = calloc(pages, sizeof(privcmd_mmap_entry_t));
if ( entries == NULL )
goto err;
- elf->dest = mmap(NULL, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE,
- MAP_SHARED, xch, 0);
- if ( elf->dest == MAP_FAILED )
- goto err;
for ( i = 0; i < pages; i++ )
- {
- entries[i].va = (uintptr_t)elf->dest + (i << PAGE_SHIFT);
entries[i].mfn = parray[(elf->pstart >> PAGE_SHIFT) + i];
- entries[i].npages = 1;
- }
- rc = xc_map_foreign_ranges(xch, dom, entries, pages);
- if ( rc < 0 )
+ elf->dest = xc_map_foreign_ranges(
+ xch, dom, pages << PAGE_SHIFT, PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
+ entries, pages);
+ if ( elf->dest == NULL )
goto err;
/* Load the initial elf image. */
elf_load_binary(elf);
rc = 0;
- err:
- if ( elf->dest )
- {
- munmap(elf->dest, pages << PAGE_SHIFT);
- elf->dest = NULL;
- }
+ munmap(elf->dest, pages << PAGE_SHIFT);
+ elf->dest = NULL;
- if ( entries )
- free(entries);
+ err:
+ free(entries);
return rc;
}
@@ -239,7 +229,7 @@ static int setup_guest(int xc_handle,
if ( ((count | cur_pages) & (SUPERPAGE_NR_PFNS - 1)) == 0 )
{
long done;
- xen_pfn_t sp_extents[2048 >> SUPERPAGE_PFN_SHIFT];
+ xen_pfn_t sp_extents[count >> SUPERPAGE_PFN_SHIFT];
struct xen_memory_reservation sp_req = {
.nr_extents = count >> SUPERPAGE_PFN_SHIFT,
.extent_order = SUPERPAGE_PFN_SHIFT,
diff --git a/tools/libxc/xc_linux.c b/tools/libxc/xc_linux.c
index d407299ff8..2480b3ce72 100644
--- a/tools/libxc/xc_linux.c
+++ b/tools/libxc/xc_linux.c
@@ -118,16 +118,41 @@ void *xc_map_foreign_range(int xc_handle, uint32_t dom,
return addr;
}
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
- privcmd_mmap_entry_t *entries, int nr)
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+ size_t size, int prot, size_t chunksize,
+ privcmd_mmap_entry_t entries[], int nentries)
{
privcmd_mmap_t ioctlx;
+ int i, rc;
+ void *addr;
+
+ addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+ if ( addr == MAP_FAILED )
+ goto mmap_failed;
- ioctlx.num = nr;
+ for ( i = 0; i < nentries; i++ )
+ {
+ entries[i].va = (unsigned long)addr + (i * chunksize);
+ entries[i].npages = chunksize >> PAGE_SHIFT;
+ }
+
+ ioctlx.num = nentries;
ioctlx.dom = dom;
ioctlx.entry = entries;
- return ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+ rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+ if ( rc )
+ goto ioctl_failed;
+
+ return addr;
+
+ioctl_failed:
+ rc = munmap(addr, size);
+ if ( rc == -1 )
+ ERROR("%s: error in error path\n", __FUNCTION__);
+
+mmap_failed:
+ return NULL;
}
static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data)
diff --git a/tools/libxc/xc_minios.c b/tools/libxc/xc_minios.c
index c57b7a5b7d..b1b3c09a48 100644
--- a/tools/libxc/xc_minios.c
+++ b/tools/libxc/xc_minios.c
@@ -15,6 +15,7 @@
#include <os.h>
#include <mm.h>
#include <lib.h>
+#include <gntmap.h>
#include <events.h>
#include <wait.h>
#include <sys/mman.h>
@@ -76,17 +77,31 @@ void *xc_map_foreign_range(int xc_handle, uint32_t dom,
return map_frames_ex(&mfn, size / getpagesize(), 0, 1, 1, dom, 0, pt_prot);
}
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
- privcmd_mmap_entry_t *entries, int nr)
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+ size_t size, int prot, size_t chunksize,
+ privcmd_mmap_entry_t entries[], int nentries)
{
- int i;
- for (i = 0; i < nr; i++) {
- unsigned long mfn = entries[i].mfn;
- do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, L1_PROT);
- }
- return 0;
+ unsigned long mfns[size / PAGE_SIZE];
+ int i, j, n;
+ unsigned long pt_prot = 0;
+#ifdef __ia64__
+ /* TODO */
+#else
+ if (prot & PROT_READ)
+ pt_prot = L1_PROT_RO;
+ if (prot & PROT_WRITE)
+ pt_prot = L1_PROT;
+#endif
+
+ n = 0;
+ for (i = 0; i < nentries; i++)
+ for (j = 0; j < chunksize / PAGE_SIZE; j++)
+ mfns[n++] = entries[i].mfn + j;
+
+ return map_frames_ex(mfns, n, 1, 0, 1, dom, 0, pt_prot);
}
+
int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall)
{
multicall_entry_t call;
@@ -102,8 +117,8 @@ int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall)
errno = -ret;
return -1;
}
- if (call.result < 0) {
- errno = -call.result;
+ if ((long) call.result < 0) {
+ errno = - (long) call.result;
return -1;
}
return call.result;
@@ -244,8 +259,11 @@ int xc_evtchn_unbind(int xce_handle, evtchn_port_t port)
files[xce_handle].evtchn.ports[i].port = -1;
break;
}
- if (i == MAX_EVTCHN_PORTS)
+ if (i == MAX_EVTCHN_PORTS) {
printf("Warning: couldn't find port %"PRId32" for xc handle %x\n", port, xce_handle);
+ errno = -EINVAL;
+ return -1;
+ }
files[xce_handle].evtchn.ports[i].bound = 0;
unbind_evtchn(port);
return 0;
@@ -278,18 +296,24 @@ evtchn_port_or_error_t xc_evtchn_pending(int xce_handle)
{
int i;
unsigned long flags;
+ evtchn_port_t ret = -1;
+
local_irq_save(flags);
+ files[xce_handle].read = 0;
for (i = 0; i < MAX_EVTCHN_PORTS; i++) {
- evtchn_port_t port = files[xce_handle].evtchn.ports[i].port;
- if (port != -1 && files[xce_handle].evtchn.ports[i].pending) {
- files[xce_handle].evtchn.ports[i].pending = 0;
- local_irq_restore(flags);
- return port;
- }
+ evtchn_port_t port = files[xce_handle].evtchn.ports[i].port;
+ if (port != -1 && files[xce_handle].evtchn.ports[i].pending) {
+ if (ret == -1) {
+ ret = port;
+ files[xce_handle].evtchn.ports[i].pending = 0;
+ } else {
+ files[xce_handle].read = 1;
+ break;
+ }
+ }
}
- files[xce_handle].read = 0;
local_irq_restore(flags);
- return -1;
+ return ret;
}
int xc_evtchn_unmask(int xce_handle, evtchn_port_t port)
@@ -304,6 +328,88 @@ void discard_file_cache(int fd, int flush)
if (flush)
fsync(fd);
}
+
+int xc_gnttab_open(void)
+{
+ int xcg_handle;
+ xcg_handle = alloc_fd(FTYPE_GNTMAP);
+ gntmap_init(&files[xcg_handle].gntmap);
+ return xcg_handle;
+}
+
+int xc_gnttab_close(int xcg_handle)
+{
+ gntmap_fini(&files[xcg_handle].gntmap);
+ files[xcg_handle].type = FTYPE_NONE;
+ return 0;
+}
+
+void *xc_gnttab_map_grant_ref(int xcg_handle,
+ uint32_t domid,
+ uint32_t ref,
+ int prot)
+{
+ return gntmap_map_grant_refs(&files[xcg_handle].gntmap,
+ 1,
+ &domid, 0,
+ &ref,
+ prot & PROT_WRITE);
+}
+
+void *xc_gnttab_map_grant_refs(int xcg_handle,
+ uint32_t count,
+ uint32_t *domids,
+ uint32_t *refs,
+ int prot)
+{
+ return gntmap_map_grant_refs(&files[xcg_handle].gntmap,
+ count,
+ domids, 1,
+ refs,
+ prot & PROT_WRITE);
+}
+
+void *xc_gnttab_map_domain_grant_refs(int xcg_handle,
+ uint32_t count,
+ uint32_t domid,
+ uint32_t *refs,
+ int prot)
+{
+ return gntmap_map_grant_refs(&files[xcg_handle].gntmap,
+ count,
+ &domid, 0,
+ refs,
+ prot & PROT_WRITE);
+}
+
+int xc_gnttab_munmap(int xcg_handle,
+ void *start_address,
+ uint32_t count)
+{
+ int ret;
+ ret = gntmap_munmap(&files[xcg_handle].gntmap,
+ (unsigned long) start_address,
+ count);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+ }
+ return ret;
+}
+
+int xc_gnttab_set_max_grants(int xcg_handle,
+ uint32_t count)
+{
+ int ret;
+ ret = gntmap_set_max_grants(&files[xcg_handle].gntmap,
+ count);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+ }
+ return ret;
+}
+
/*
* Local variables:
* mode: C
diff --git a/tools/libxc/xc_netbsd.c b/tools/libxc/xc_netbsd.c
index 036f64879c..aab325f68a 100644
--- a/tools/libxc/xc_netbsd.c
+++ b/tools/libxc/xc_netbsd.c
@@ -11,7 +11,6 @@
#include "xc_private.h"
-#include <xen/memory.h>
#include <xen/sys/evtchn.h>
#include <unistd.h>
#include <fcntl.h>
@@ -114,23 +113,43 @@ void *xc_map_foreign_range(int xc_handle, uint32_t dom,
return addr;
}
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
- privcmd_mmap_entry_t *entries, int nr)
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+ size_t size, int prot, size_t chunksize,
+ privcmd_mmap_entry_t entries[], int nentries)
{
- privcmd_mmap_t ioctlx;
- int err;
+ privcmd_mmap_t ioctlx;
+ int i, rc;
+ void *addr;
- ioctlx.num = nr;
- ioctlx.dom = dom;
- ioctlx.entry = entries;
+ addr = mmap(NULL, size, prot, MAP_ANON | MAP_SHARED, -1, 0);
+ if (addr == MAP_FAILED)
+ goto mmap_failed;
- err = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
- if (err == 0)
- return 0;
- else
- return -errno;
+ for (i = 0; i < nentries; i++) {
+ entries[i].va = (uintptr_t)addr + (i * chunksize);
+ entries[i].npages = chunksize >> PAGE_SHIFT;
+ }
+
+ ioctlx.num = nentries;
+ ioctlx.dom = dom;
+ ioctlx.entry = entries;
+
+ rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+ if (rc)
+ goto ioctl_failed;
+
+ return addr;
+
+ioctl_failed:
+ rc = munmap(addr, size);
+ if (rc == -1)
+ ERROR("%s: error in error path\n", __FUNCTION__);
+
+mmap_failed:
+ return NULL;
}
+
static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data)
{
int err = ioctl(xc_handle, cmd, data);
diff --git a/tools/libxc/xc_physdev.c b/tools/libxc/xc_physdev.c
index 2b619664cb..b320f19dc8 100644
--- a/tools/libxc/xc_physdev.c
+++ b/tools/libxc/xc_physdev.c
@@ -22,7 +22,6 @@ int xc_physdev_pci_access_modify(int xc_handle,
int xc_physdev_map_pirq(int xc_handle,
int domid,
- int type,
int index,
int *pirq)
{
@@ -33,7 +32,7 @@ int xc_physdev_map_pirq(int xc_handle,
return -EINVAL;
map.domid = domid;
- map.type = type;
+ map.type = MAP_PIRQ_TYPE_GSI;
map.index = index;
map.pirq = *pirq;
@@ -47,7 +46,6 @@ int xc_physdev_map_pirq(int xc_handle,
int xc_physdev_map_pirq_msi(int xc_handle,
int domid,
- int type,
int index,
int *pirq,
int devfn,
@@ -62,7 +60,7 @@ int xc_physdev_map_pirq_msi(int xc_handle,
return -EINVAL;
map.domid = domid;
- map.type = type;
+ map.type = MAP_PIRQ_TYPE_MSI;
map.index = index;
map.pirq = *pirq;
map.msi_info.devfn = devfn;
diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h
index 2916903aef..6e49b749a0 100644
--- a/tools/libxc/xc_private.h
+++ b/tools/libxc/xc_private.h
@@ -184,8 +184,9 @@ static inline int do_sysctl(int xc_handle, struct xen_sysctl *sysctl)
return ret;
}
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
- privcmd_mmap_entry_t *entries, int nr);
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+ size_t size, int prot, size_t chunksize,
+ privcmd_mmap_entry_t entries[], int nentries);
void *map_domain_va_core(unsigned long domfd, int cpu, void *guest_va,
vcpu_guest_context_any_t *ctxt);
diff --git a/tools/libxc/xc_solaris.c b/tools/libxc/xc_solaris.c
index 86eee3c719..f88a928906 100644
--- a/tools/libxc/xc_solaris.c
+++ b/tools/libxc/xc_solaris.c
@@ -109,18 +109,41 @@ void *xc_map_foreign_range(int xc_handle, uint32_t dom,
return addr;
}
-int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
- privcmd_mmap_entry_t *entries, int nr)
+void *xc_map_foreign_ranges(int xc_handle, uint32_t dom,
+ size_t size, int prot, size_t chunksize,
+ privcmd_mmap_entry_t entries[], int nentries)
{
privcmd_mmap_t ioctlx;
+ int i, rc;
+ void *addr;
+
+ addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
+ if (addr == MAP_FAILED)
+ goto mmap_failed;
+
+ for (i = 0; i < nentries; i++) {
+ entries[i].va = (uintptr_t)addr + (i * chunksize);
+ entries[i].npages = chunksize >> PAGE_SHIFT;
+ }
- ioctlx.num = nr;
+ ioctlx.num = nentries;
ioctlx.dom = dom;
ioctlx.entry = entries;
- return ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+ rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx);
+ if (rc)
+ goto ioctl_failed;
+
+ioctl_failed:
+ rc = munmap(addr, size);
+ if (rc == -1)
+ ERROR("%s: error in error path\n", __FUNCTION__);
+
+mmap_failed:
+ return NULL;
}
+
static int do_privcmd(int xc_handle, unsigned int cmd, unsigned long data)
{
return ioctl(xc_handle, cmd, data);
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 2f6aafb0c8..0639d3fd23 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -907,13 +907,11 @@ int xc_gnttab_set_max_grants(int xcg_handle,
int xc_physdev_map_pirq(int xc_handle,
int domid,
- int type,
int index,
int *pirq);
int xc_physdev_map_pirq_msi(int xc_handle,
int domid,
- int type,
int index,
int *pirq,
int devfn,
diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub
index 46ed713664..48855adc45 100644
--- a/tools/pygrub/src/pygrub
+++ b/tools/pygrub/src/pygrub
@@ -21,7 +21,7 @@ import platform
import curses, _curses, curses.wrapper, curses.textpad, curses.ascii
import getopt
-sys.path = [ '/usr/lib/python' ] + sys.path
+sys.path = [ '/usr/lib/python', '/usr/lib64/python' ] + sys.path
import fsimage
import grub.GrubConf
diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c
index d45aa85dc7..01e0542c9e 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -958,8 +958,7 @@ static PyObject *pyxc_physdev_map_pirq(PyObject *self,
if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iii", kwd_list,
&dom, &index, &pirq) )
return NULL;
- ret = xc_physdev_map_pirq(xc->xc_handle, dom, MAP_PIRQ_TYPE_GSI,
- index, &pirq);
+ ret = xc_physdev_map_pirq(xc->xc_handle, dom, index, &pirq);
if ( ret != 0 )
return pyxc_error_to_exception();
return PyLong_FromUnsignedLong(pirq);
diff --git a/tools/python/xen/util/pci.py b/tools/python/xen/util/pci.py
index 42a0f81d8a..36fd4ebe9e 100644
--- a/tools/python/xen/util/pci.py
+++ b/tools/python/xen/util/pci.py
@@ -44,6 +44,12 @@ PCI_STATUS = 0x6
PCI_CLASS_DEVICE = 0x0a
PCI_CLASS_BRIDGE_PCI = 0x0604
+PCI_HEADER_TYPE = 0x0e
+PCI_HEADER_TYPE_MASK = 0x7f
+PCI_HEADER_TYPE_NORMAL = 0
+PCI_HEADER_TYPE_BRIDGE = 1
+PCI_HEADER_TYPE_CARDBUS = 2
+
PCI_CAPABILITY_LIST = 0x34
PCI_CB_BRIDGE_CONTROL = 0x3e
PCI_BRIDGE_CTL_BUS_RESET= 0x40
@@ -57,6 +63,12 @@ PCI_EXP_DEVCAP_FLR = (0x1 << 28)
PCI_EXP_DEVCTL = 0x8
PCI_EXP_DEVCTL_FLR = (0x1 << 15)
+PCI_CAP_ID_PM = 0x01
+PCI_PM_CTRL = 4
+PCI_PM_CTRL_NO_SOFT_RESET = 0x0004
+PCI_PM_CTRL_STATE_MASK = 0x0003
+PCI_D3hot = 3
+
PCI_CAP_ID_AF = 0x13
PCI_AF_CAPs = 0x3
PCI_AF_CAPs_TP_FLR = 0x3
@@ -105,15 +117,22 @@ def parse_hex(val):
return None
def parse_pci_name(pci_name_string):
- # Format: xxxx:xx:xx:x
- s = pci_name_string
- s = s.split(':')
- dom = parse_hex(s[0])
- bus = parse_hex(s[1])
- s = s[2].split('.')
- dev = parse_hex(s[0])
- func = parse_hex(s[1])
- return (dom, bus, dev, func)
+ pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
+ r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
+ r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \
+ r"(?P<func>[0-7])$", pci_name_string)
+ if pci_match is None:
+ raise PciDeviceParseError(('Failed to parse pci device name: %s' %
+ pci_name_string))
+ pci_dev_info = pci_match.groupdict('0')
+
+ domain = parse_hex(pci_dev_info['domain'])
+ bus = parse_hex(pci_dev_info['bus'])
+ slot = parse_hex(pci_dev_info['slot'])
+ func = parse_hex(pci_dev_info['func'])
+
+ return (domain, bus, slot, func)
+
def find_sysfs_mnt():
global sysfs_mnt_point
@@ -169,14 +188,14 @@ def create_lspci_info():
# Execute 'lspci' command and parse the result.
# If the command does not exist, lspci_info will be kept blank ({}).
- for paragraph in os.popen(LSPCI_CMD + ' -vmmD').read().split('\n\n'):
+ for paragraph in os.popen(LSPCI_CMD + ' -vmm').read().split('\n\n'):
device_name = None
device_info = {}
for line in paragraph.split('\n'):
try:
(opt, value) = line.split(':\t')
if opt == 'Slot':
- device_name = value
+ device_name = PCI_DEV_FORMAT_STR % parse_pci_name(value)
else:
device_info[opt] = value
except:
@@ -246,18 +265,8 @@ def transform_list(target, src):
return result
def check_FLR_capability(dev_list):
- i = len(dev_list)
- if i == 0:
+ if len(dev_list) == 0:
return []
- i = i - 1;
- while i >= 0:
- dev = dev_list[i]
- if dev.bus == 0:
- if dev.dev_type == DEV_TYPE_PCIe_ENDPOINT and not dev.pcie_flr:
- del dev_list[i]
- elif dev.dev_type == DEV_TYPE_PCI and not dev.pci_af_flr:
- del dev_list[i]
- i = i - 1
pci_list = []
pci_dev_dict = {}
@@ -270,6 +279,8 @@ def check_FLR_capability(dev_list):
for pci in pci_list:
if isinstance(pci, types.StringTypes):
dev = pci_dev_dict[pci]
+ if dev.bus == 0:
+ continue
if dev.dev_type == DEV_TYPE_PCIe_ENDPOINT and not dev.pcie_flr:
coassigned_pci_list = dev.find_all_the_multi_functions()
need_transform = True
@@ -338,13 +349,6 @@ class PciDeviceAssignmentError(Exception):
return 'pci: impproper device assignment spcified: ' + \
self.message
-class PciDeviceFlrError(PciDeviceAssignmentError):
- def __init__(self,msg):
- self.message = msg
- def __str__(self):
- return 'Can not find a suitable FLR method for the device(s): ' + \
- self.message
-
class PciDevice:
def __init__(self, domain, bus, slot, func):
self.domain = domain
@@ -480,6 +484,27 @@ class PciDevice:
# Restore the config spaces
restore_pci_conf_space((pci_list, cfg_list))
+ def do_Dstate_transition(self):
+ pos = self.find_cap_offset(PCI_CAP_ID_PM)
+ if pos == 0:
+ return
+
+ (pci_list, cfg_list) = save_pci_conf_space([self.name])
+
+ # Enter D3hot without soft reset
+ pm_ctl = self.pci_conf_read32(pos + PCI_PM_CTRL)
+ pm_ctl |= PCI_PM_CTRL_NO_SOFT_RESET
+ pm_ctl &= ~PCI_PM_CTRL_STATE_MASK
+ pm_ctl |= PCI_D3hot
+ self.pci_conf_write32(pos + PCI_PM_CTRL, pm_ctl)
+ time.sleep(0.010)
+
+ # From D3hot to D0
+ self.pci_conf_write32(pos + PCI_PM_CTRL, 0)
+ time.sleep(0.010)
+
+ restore_pci_conf_space((pci_list, cfg_list))
+
def find_all_the_multi_functions(self):
sysfs_mnt = find_sysfs_mnt()
pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH).read()
@@ -650,13 +675,16 @@ class PciDevice:
time.sleep(0.200)
restore_pci_conf_space((pci_list, cfg_list))
else:
- funcs = self.find_all_the_multi_functions()
- self.devs_check_driver(funcs)
+ if self.bus == 0:
+ self.do_Dstate_transition()
+ else:
+ funcs = self.find_all_the_multi_functions()
+ self.devs_check_driver(funcs)
- parent = '%04x:%02x:%02x.%01x' % self.find_parent()
+ parent = '%04x:%02x:%02x.%01x' % self.find_parent()
- # Do Secondary Bus Reset.
- self.do_secondary_bus_reset(parent, funcs)
+ # Do Secondary Bus Reset.
+ self.do_secondary_bus_reset(parent, funcs)
# PCI devices
else:
# For PCI device on host bus, we test "PCI Advanced Capabilities".
@@ -669,9 +697,7 @@ class PciDevice:
restore_pci_conf_space((pci_list, cfg_list))
else:
if self.bus == 0:
- err_msg = 'pci: %s is not assignable: it is on bus 0, '+ \
- 'but it has no PCI Advanced Capabilities.'
- raise PciDeviceFlrError(err_msg % self.name)
+ self.do_Dstate_transition()
else:
devs = self.find_coassigned_devices(False)
# Remove the element 0 which is a bridge
@@ -690,12 +716,24 @@ class PciDevice:
self.name+SYSFS_PCI_DEV_CONFIG_PATH
try:
conf_file = open(path, 'rb')
+ conf_file.seek(PCI_HEADER_TYPE)
+ header_type = ord(conf_file.read(1)) & PCI_HEADER_TYPE_MASK
+ if header_type == PCI_HEADER_TYPE_CARDBUS:
+ return
conf_file.seek(PCI_STATUS_OFFSET)
status = ord(conf_file.read(1))
if status&PCI_STATUS_CAP_MASK:
conf_file.seek(PCI_CAP_OFFSET)
capa_pointer = ord(conf_file.read(1))
+ capa_count = 0
while capa_pointer:
+ if capa_pointer < 0x40:
+ raise PciDeviceParseError(
+ ('Broken capability chain: %s' % self.name))
+ capa_count += 1
+ if capa_count > 96:
+ raise PciDeviceParseError(
+ ('Looped capability chain: %s' % self.name))
conf_file.seek(capa_pointer)
capa_id = ord(conf_file.read(1))
capa_pointer = ord(conf_file.read(1))
diff --git a/tools/python/xen/util/utils.py b/tools/python/xen/util/utils.py
index b6c7d2ae40..e13d29bb2d 100644
--- a/tools/python/xen/util/utils.py
+++ b/tools/python/xen/util/utils.py
@@ -1,6 +1,50 @@
import traceback
import sys
+import os
def exception_string(e):
(ty,v,tb) = sys.exc_info()
return traceback.format_exception_only(ty,v)
+
+def daemonize(prog, args, stdin_tmpfile=None):
+ """Runs a program as a daemon with the list of arguments. Returns the PID
+ of the daemonized program, or returns 0 on error.
+ """
+ r, w = os.pipe()
+ pid = os.fork()
+
+ if pid == 0:
+ os.close(r)
+ w = os.fdopen(w, 'w')
+ os.setsid()
+ try:
+ pid2 = os.fork()
+ except:
+ pid2 = None
+ if pid2 == 0:
+ os.chdir("/")
+ null_fd = os.open("/dev/null", os.O_RDWR)
+ if stdin_tmpfile is not None:
+ os.dup2(stdin_tmpfile.fileno(), 0)
+ else:
+ os.dup2(null_fd, 0)
+ os.dup2(null_fd, 1)
+ os.dup2(null_fd, 2)
+ for fd in range(3, 256):
+ try:
+ os.close(fd)
+ except:
+ pass
+ os.execvp(prog, args)
+ os._exit(1)
+ else:
+ w.write(str(pid2 or 0))
+ w.close()
+ os._exit(0)
+ os.close(w)
+ r = os.fdopen(r)
+ daemon_pid = int(r.read())
+ r.close()
+ os.waitpid(pid, 0)
+ return daemon_pid
+
diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
index 5052009158..485b5035dc 100644
--- a/tools/python/xen/xend/XendDomainInfo.py
+++ b/tools/python/xen/xend/XendDomainInfo.py
@@ -599,14 +599,17 @@ class XendDomainInfo:
new_dev['func'])
bdf = xc.test_assign_device(self.domid, pci_str)
if bdf != 0:
+ if bdf == -1:
+ raise VmError("failed to assign device: maybe the platform"
+ " doesn't support VT-d, or VT-d isn't enabled"
+ " properly?")
bus = (bdf >> 16) & 0xff
devfn = (bdf >> 8) & 0xff
dev = (devfn >> 3) & 0x1f
func = devfn & 0x7
- raise VmError("Fail to hot insert device(%x:%x.%x): maybe VT-d is "
- "not enabled, or the device is not exist, or it "
- "has already been assigned to other domain"
- % (bus, dev, func))
+ raise VmError("fail to assign device(%x:%x.%x): maybe it has"
+ " already been assigned to other domain, or maybe"
+ " it doesn't exist." % (bus, dev, func))
bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'],
new_dev['bus'],
@@ -635,7 +638,10 @@ class XendDomainInfo:
self._waitForDevice(dev_type, devid)
except VmError, ex:
del self.info['devices'][dev_uuid]
- if dev_type == 'tap':
+ if dev_type == 'pci':
+ for dev in dev_config_dict['devs']:
+ XendAPIStore.deregister(dev['uuid'], 'DPCI')
+ elif dev_type == 'tap':
self.info['vbd_refs'].remove(dev_uuid)
else:
self.info['%s_refs' % dev_type].remove(dev_uuid)
@@ -2086,14 +2092,17 @@ class XendDomainInfo:
if hvm and pci_str:
bdf = xc.test_assign_device(self.domid, pci_str)
if bdf != 0:
+ if bdf == -1:
+ raise VmError("failed to assign device: maybe the platform"
+ " doesn't support VT-d, or VT-d isn't enabled"
+ " properly?")
bus = (bdf >> 16) & 0xff
devfn = (bdf >> 8) & 0xff
dev = (devfn >> 3) & 0x1f
func = devfn & 0x7
- raise VmError("Fail to assign device(%x:%x.%x): maybe VT-d is "
- "not enabled, or the device is not exist, or it "
- "has already been assigned to other domain"
- % (bus, dev, func))
+ raise VmError("fail to assign device(%x:%x.%x): maybe it has"
+ " already been assigned to other domain, or maybe"
+ " it doesn't exist." % (bus, dev, func))
# register the domain in the list
from xen.xend import XendDomain
@@ -2374,6 +2383,9 @@ class XendDomainInfo:
def destroy(self):
"""Cleanup VM and destroy domain. Nothrow guarantee."""
+ if self.domid is None:
+ return
+
log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
paths = self._prepare_phantom_paths()
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index 3b1fc4225c..1add987528 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -114,7 +114,7 @@ class ImageHandler:
self.display = vmConfig['platform'].get('display')
self.xauthority = vmConfig['platform'].get('xauthority')
- self.vncconsole = vmConfig['platform'].get('vncconsole')
+ self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0))
self.dmargs = self.parseDeviceModelArgs(vmConfig)
self.pid = None
rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
@@ -250,10 +250,6 @@ class ImageHandler:
def parseDeviceModelArgs(self, vmConfig):
ret = ["-domain-name", str(self.vm.info['name_label'])]
- # Tell QEMU how large the guest's memory allocation is
- # to help it when loading the initrd (if neccessary)
- ret += ["-m", str(self.getRequiredInitialReservation() / 1024)]
-
# Find RFB console device, and if it exists, make QEMU enable
# the VNC console.
if int(vmConfig['platform'].get('nographic', 0)) != 0:
@@ -777,6 +773,10 @@ class HVMImageHandler(ImageHandler):
ret.append("tap,vlan=%d,ifname=tap%d.%d,bridge=%s" %
(nics, self.vm.getDomid(), nics-1, bridge))
+ if nics == 0:
+ ret.append("-net")
+ ret.append("none")
+
return ret
def getDeviceModelArgs(self, restore = False):
diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py
index 9946377d9f..6dd51be529 100644
--- a/tools/python/xen/xend/server/pciif.py
+++ b/tools/python/xen/xend/server/pciif.py
@@ -375,17 +375,34 @@ class PciController(DevController):
raise VmError("pci: failed to locate device and "+
"parse it's resources - "+str(e))
if (dev.dev_type == DEV_TYPE_PCIe_ENDPOINT) and not dev.pcie_flr:
- funcs = dev.find_all_the_multi_functions()
- for f in funcs:
- if not f in pci_str_list:
- err_msg = 'pci: % must be co-assigned to guest with %s'
- raise VmError(err_msg % (f, dev.name))
+ if dev.bus == 0:
+ # We cope with this case by using the Dstate transition
+ # method for now.
+ err_msg = 'pci: %s: it is on bus 0, but has no PCIe' +\
+ ' FLR Capability. Will try the Dstate transition'+\
+ ' method if available.'
+ log.warn(err_msg % dev.name)
+ else:
+ funcs = dev.find_all_the_multi_functions()
+ for f in funcs:
+ if not f in pci_str_list:
+ (f_dom, f_bus, f_slot, f_func) = parse_pci_name(f)
+ f_pci_str = '0x%x,0x%x,0x%x,0x%x' % \
+ (f_dom, f_bus, f_slot, f_func)
+ # f has been assigned to other guest?
+ if xc.test_assign_device(0, f_pci_str) != 0:
+ err_msg = 'pci: %s must be co-assigned to' + \
+ ' the same guest with %s'
+ raise VmError(err_msg % (f, dev.name))
elif dev.dev_type == DEV_TYPE_PCI:
if dev.bus == 0:
if not dev.pci_af_flr:
- err_msg = 'pci: %s is not assignable: it is on ' + \
- 'bus 0, but lacks of FLR capability'
- raise VmError(err_msg % dev.name)
+ # We cope with this case by using the Dstate transition
+ # method for now.
+ err_msg = 'pci: %s: it is on bus 0, but has no PCI' +\
+ ' Advanced Capabilities for FLR. Will try the'+\
+ ' Dstate transition method if available.'
+ log.warn(err_msg % dev.name)
else:
# All devices behind the uppermost PCI/PCI-X bridge must be\
# co-assigned to the same guest.
@@ -395,8 +412,14 @@ class PciController(DevController):
for s in devs_str:
if not s in pci_str_list:
- err_msg = 'pci: %s must be co-assigned to guest with %s'
- raise VmError(err_msg % (s, dev.name))
+ (s_dom, s_bus, s_slot, s_func) = parse_pci_name(s)
+ s_pci_str = '0x%x,0x%x,0x%x,0x%x' % \
+ (s_dom, s_bus, s_slot, s_func)
+ # s has been assigned to other guest?
+ if xc.test_assign_device(0, s_pci_str) != 0:
+ err_msg = 'pci: %s must be co-assigned to the'+\
+ ' same guest with %s'
+ raise VmError(err_msg % (s, dev.name))
for (domain, bus, slot, func) in pci_dev_list:
self.setupOneDevice(domain, bus, slot, func)
diff --git a/tools/python/xen/xm/console.py b/tools/python/xen/xm/console.py
index f971644fe9..0b83f1139d 100644
--- a/tools/python/xen/xm/console.py
+++ b/tools/python/xen/xm/console.py
@@ -15,10 +15,69 @@
# Copyright (C) 2005 XenSource Ltd
#============================================================================
+import xen.util.auxbin
+import xen.lowlevel.xs
+import os
+import sys
+import signal
+from xen.util import utils
XENCONSOLE = "xenconsole"
-import xen.util.auxbin
-
def execConsole(domid):
xen.util.auxbin.execute(XENCONSOLE, [str(domid)])
+
+
+class OurXenstoreConnection:
+ def __init__(self):
+ self.handle = xen.lowlevel.xs.xs()
+ def read_eventually(self, path):
+ watch = None
+ trans = None
+ try:
+ signal.alarm(10)
+ watch = self.handle.watch(path, None)
+ while True:
+ result = self.handle.read('0', path)
+ if result is not None:
+ signal.alarm(0)
+ return result
+ self.handle.read_watch()
+ finally:
+ signal.alarm(0)
+ if watch is not None: self.handle.unwatch(path, watch)
+ def read_maybe(self, path):
+ return self.handle.read('0', path)
+
+def runVncViewer(domid, do_autopass, do_daemonize=False):
+ xs = OurXenstoreConnection()
+ d = '/local/domain/%d/' % domid
+ vnc_port = xs.read_eventually(d + 'console/vnc-port')
+ vfb_backend = xs.read_maybe(d + 'device/vfb/0/backend')
+ vnc_listen = None
+ vnc_password = None
+ vnc_password_tmpfile = None
+ cmdl = ['vncviewer']
+ if vfb_backend is not None:
+ vnc_listen = xs.read_maybe(vfb_backend + '/vnclisten')
+ if do_autopass:
+ vnc_password = xs.read_maybe(vfb_backend + '/vncpasswd')
+ if vnc_password is not None:
+ cmdl.append('-autopass')
+ vnc_password_tmpfile = os.tmpfile()
+ print >>vnc_password_tmpfile, vnc_password
+ vnc_password_tmpfile.seek(0)
+ vnc_password_tmpfile.flush()
+ if vnc_listen is None:
+ vnc_listen = 'localhost'
+ cmdl.append('%s:%d' % (vnc_listen, int(vnc_port) - 5900))
+ if do_daemonize:
+ pid = utils.daemonize('vncviewer', cmdl, vnc_password_tmpfile)
+ if pid == 0:
+ puts >>sys.stderr, 'failed to invoke vncviewer'
+ os._exit(-1)
+ else:
+ print 'invoking ', ' '.join(cmdl)
+ if vnc_password_tmpfile is not None:
+ os.dup2(vnc_password_tmpfile.fileno(), 0)
+ os.execvp('vncviewer', cmdl)
diff --git a/tools/python/xen/xm/create.dtd b/tools/python/xen/xm/create.dtd
index efb3cbfd3e..0d9b52405a 100644
--- a/tools/python/xen/xm/create.dtd
+++ b/tools/python/xen/xm/create.dtd
@@ -39,6 +39,7 @@
vbd*,
vif*,
vtpm*,
+ pci*,
console*,
platform*,
vcpu_param*,
@@ -80,6 +81,13 @@
<!ELEMENT vtpm (name*)>
<!ATTLIST vtpm backend CDATA #REQUIRED>
+<!ELEMENT pci EMPTY>
+<!ATTLIST pci domain CDATA #REQUIRED
+ bus CDATA #REQUIRED
+ slot CDATA #REQUIRED
+ func CDATA #REQUIRED
+ vslt CDATA #IMPLIED>
+
<!ELEMENT console (other_config*)>
<!ATTLIST console protocol (vt100|rfb|rdp) #REQUIRED>
diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
index 3652d8c775..9d00a81226 100644
--- a/tools/python/xen/xm/create.py
+++ b/tools/python/xen/xm/create.py
@@ -36,10 +36,12 @@ from xen.util import blkif
from xen.util import vscsi_util
import xen.util.xsm.xsm as security
from xen.xm.main import serverType, SERVER_XEN_API, get_single_vm
+from xen.util import utils
from xen.xm.opts import *
from main import server
+from main import domain_name_to_domid
import console
@@ -118,6 +120,14 @@ gopts.opt('console_autoconnect', short='c',
fn=set_true, default=0,
use="Connect to the console after the domain is created.")
+gopts.opt('vncviewer',
+ fn=set_true, default=0,
+ use="Connect to the VNC display after the domain is created.")
+
+gopts.opt('vncviewer-autopass',
+ fn=set_true, default=0,
+ use="Pass VNC password to viewer via stdin and -autopass.")
+
gopts.var('vncpasswd', val='NAME',
fn=set_value, default=None,
use="Password for VNC console on HVM domain.")
@@ -128,7 +138,7 @@ gopts.var('vncviewer', val='no|yes',
"The address of the vncviewer is passed to the domain on the "
"kernel command line using 'VNC_SERVER=<host>:<port>'. The port "
"used by vnc is 5500 + DISPLAY. A display value with a free port "
- "is chosen if possible.\nOnly valid when vnc=1.")
+ "is chosen if possible.\nOnly valid when vnc=1.\nDEPRECATED")
gopts.var('vncconsole', val='no|yes',
fn=set_bool, default=None,
@@ -1108,44 +1118,6 @@ def choose_vnc_display():
return None
vncpid = None
-def daemonize(prog, args):
- """Runs a program as a daemon with the list of arguments. Returns the PID
- of the daemonized program, or returns 0 on error.
- """
- r, w = os.pipe()
- pid = os.fork()
-
- if pid == 0:
- os.close(r)
- w = os.fdopen(w, 'w')
- os.setsid()
- try:
- pid2 = os.fork()
- except:
- pid2 = None
- if pid2 == 0:
- os.chdir("/")
- for fd in range(0, 256):
- try:
- os.close(fd)
- except:
- pass
- os.open("/dev/null", os.O_RDWR)
- os.dup2(0, 1)
- os.dup2(0, 2)
- os.execvp(prog, args)
- os._exit(1)
- else:
- w.write(str(pid2 or 0))
- w.close()
- os._exit(0)
- os.close(w)
- r = os.fdopen(r)
- daemon_pid = int(r.read())
- r.close()
- os.waitpid(pid, 0)
- return daemon_pid
-
def spawn_vnc(display):
"""Spawns a vncviewer that listens on the specified display. On success,
returns the port that the vncviewer is listening on and sets the global
@@ -1154,7 +1126,7 @@ def spawn_vnc(display):
vncargs = (["vncviewer", "-log", "*:stdout:0",
"-listen", "%d" % (VNC_BASE_PORT + display) ])
global vncpid
- vncpid = daemonize("vncviewer", vncargs)
+ vncpid = utils.daemonize("vncviewer", vncargs)
if vncpid == 0:
return 0
@@ -1362,6 +1334,11 @@ def main(argv):
elif not opts.is_xml:
dom = make_domain(opts, config)
+ if opts.vals.vncviewer:
+ domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
+ vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
+ console.runVncViewer(domid, vncviewer_autopass, True)
+
def do_console(domain_name):
cpid = os.fork()
if cpid != 0:
@@ -1373,13 +1350,7 @@ def do_console(domain_name):
if os.WEXITSTATUS(rv) != 0:
sys.exit(os.WEXITSTATUS(rv))
try:
- # Acquire the console of the created dom
- if serverType == SERVER_XEN_API:
- domid = server.xenapi.VM.get_domid(
- get_single_vm(domain_name))
- else:
- dom = server.xend.domain(domain_name)
- domid = int(sxp.child_value(dom, 'domid', '-1'))
+ domid = domain_name_to_domid(domain_name)
console.execConsole(domid)
except:
pass
diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
index b7383d2190..548fbc9b30 100644
--- a/tools/python/xen/xm/main.py
+++ b/tools/python/xen/xm/main.py
@@ -64,6 +64,9 @@ import inspect
from xen.xend import XendOptions
xoptions = XendOptions.instance()
+import signal
+signal.signal(signal.SIGINT, signal.SIG_DFL)
+
# getopt.gnu_getopt is better, but only exists in Python 2.3+. Use
# getopt.getopt if gnu_getopt is not available. This will mean that options
# may only be specified before positional arguments.
@@ -97,6 +100,8 @@ SUBCOMMAND_HELP = {
'console' : ('[-q|--quiet] <Domain>',
'Attach to <Domain>\'s console.'),
+ 'vncviewer' : ('[--[vncviewer-]autopass] <Domain>',
+ 'Attach to <Domain>\'s VNC server.'),
'create' : ('<ConfigFile> [options] [vars]',
'Create a domain based on <ConfigFile>.'),
'destroy' : ('<Domain>',
@@ -243,6 +248,10 @@ SUBCOMMAND_OPTIONS = {
'console': (
('-q', '--quiet', 'Do not print an error message if the domain does not exist'),
),
+ 'vncviewer': (
+ ('', '--autopass', 'Pass VNC password to viewer via stdin and -autopass'),
+ ('', '--vncviewer-autopass', '(consistency alias for --autopass)'),
+ ),
'dmesg': (
('-c', '--clear', 'Clear dmesg buffer as well as printing it'),
),
@@ -260,6 +269,8 @@ SUBCOMMAND_OPTIONS = {
'start': (
('-p', '--paused', 'Do not unpause domain after starting it'),
('-c', '--console_autoconnect', 'Connect to the console after the domain is created'),
+ ('', '--vncviewer', 'Connect to display via VNC after the domain is created'),
+ ('', '--vncviewer-autopass', 'Pass VNC password to viewer via stdin and -autopass'),
),
'resume': (
('-p', '--paused', 'Do not unpause domain after resuming it'),
@@ -277,6 +288,7 @@ SUBCOMMAND_OPTIONS = {
common_commands = [
"console",
+ "vncviewer",
"create",
"new",
"delete",
@@ -304,6 +316,7 @@ common_commands = [
domain_commands = [
"console",
+ "vncviewer",
"create",
"new",
"delete",
@@ -1185,14 +1198,20 @@ def xm_start(args):
paused = False
console_autoconnect = False
+ vncviewer = False
+ vncviewer_autopass = False
try:
- (options, params) = getopt.gnu_getopt(args, 'cp', ['console_autoconnect','paused'])
+ (options, params) = getopt.gnu_getopt(args, 'cp', ['console_autoconnect','paused','vncviewer','vncviewer-autopass'])
for (k, v) in options:
if k in ('-p', '--paused'):
paused = True
if k in ('-c', '--console_autoconnect'):
console_autoconnect = True
+ if k in ('--vncviewer'):
+ vncviewer = True
+ if k in ('--vncviewer-autopass'):
+ vncviewer_autopass = True
if len(params) != 1:
raise OptionError("Expects 1 argument")
@@ -1205,6 +1224,9 @@ def xm_start(args):
if console_autoconnect:
start_do_console(dom)
+ if console_autoconnect:
+ console.runVncViewer(domid, vncviewer_autopass, True)
+
try:
if serverType == SERVER_XEN_API:
server.xenapi.VM.start(get_single_vm(dom), paused)
@@ -1783,6 +1805,40 @@ def xm_console(args):
console.execConsole(domid)
+def domain_name_to_domid(domain_name):
+ if serverType == SERVER_XEN_API:
+ domid = server.xenapi.VM.get_domid(
+ get_single_vm(domain_name))
+ else:
+ dom = server.xend.domain(domain_name)
+ domid = int(sxp.child_value(dom, 'domid', '-1'))
+ return domid
+
+def xm_vncviewer(args):
+ autopass = False;
+
+ try:
+ (options, params) = getopt.gnu_getopt(args, '', ['autopass','vncviewer-autopass'])
+ except getopt.GetoptError, opterr:
+ err(opterr)
+ usage('vncviewer')
+
+ for (k, v) in options:
+ if k in ['--autopass','--vncviewer-autopass']:
+ autopass = True
+ else:
+ assert False
+
+ if len(params) != 1:
+ err('No domain given (or several parameters specified)')
+ usage('vncviewer')
+
+ dom = params[0]
+ domid = domain_name_to_domid(dom)
+
+ console.runVncViewer(domid, autopass)
+
+
def xm_uptime(args):
short_mode = 0
@@ -2102,7 +2158,23 @@ def xm_pci_list(args):
dom = params[0]
- devs = server.xend.domain.getDeviceSxprs(dom, 'pci')
+ devs = []
+ if serverType == SERVER_XEN_API:
+ for dpci_ref in server.xenapi.VM.get_DPCIs(get_single_vm(dom)):
+ ppci_ref = server.xenapi.DPCI.get_PPCI(dpci_ref)
+ ppci_record = server.xenapi.PPCI.get_record(ppci_ref)
+ dev = {
+ "domain": "0x%04x" % int(ppci_record["domain"]),
+ "bus": "0x%02x" % int(ppci_record["bus"]),
+ "slot": "0x%02x" % int(ppci_record["slot"]),
+ "func": "0x%01x" % int(ppci_record["func"]),
+ "vslt": "0x%02x" % \
+ int(server.xenapi.DPCI.get_hotplug_slot(dpci_ref))
+ }
+ devs.append(dev)
+
+ else:
+ devs = server.xend.domain.getDeviceSxprs(dom, 'pci')
if len(devs) == 0:
return
@@ -2362,7 +2434,34 @@ def parse_pci_configuration(args, state):
def xm_pci_attach(args):
arg_check(args, 'pci-attach', 2, 3)
(dom, pci) = parse_pci_configuration(args, 'Initialising')
- server.xend.domain.device_configure(dom, pci)
+
+ if serverType == SERVER_XEN_API:
+
+ pci_dev = sxp.children(pci, 'dev')[0]
+ domain = int(sxp.child_value(pci_dev, 'domain'), 16)
+ bus = int(sxp.child_value(pci_dev, 'bus'), 16)
+ slot = int(sxp.child_value(pci_dev, 'slot'), 16)
+ func = int(sxp.child_value(pci_dev, 'func'), 16)
+ vslt = int(sxp.child_value(pci_dev, 'vslt'), 16)
+ name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func)
+
+ target_ref = None
+ for ppci_ref in server.xenapi.PPCI.get_all():
+ if name == server.xenapi.PPCI.get_name(ppci_ref):
+ target_ref = ppci_ref
+ break
+ if target_ref is None:
+ raise OptionError("Device %s not found" % name)
+
+ dpci_record = {
+ "VM": get_single_vm(dom),
+ "PPCI": target_ref,
+ "hotplug_slot": vslt
+ }
+ server.xenapi.DPCI.create(dpci_record)
+
+ else:
+ server.xend.domain.device_configure(dom, pci)
def xm_scsi_attach(args):
xenapi_unsupported()
@@ -2462,7 +2561,29 @@ def xm_network_detach(args):
def xm_pci_detach(args):
arg_check(args, 'pci-detach', 2)
(dom, pci) = parse_pci_configuration(args, 'Closing')
- server.xend.domain.device_configure(dom, pci)
+
+ if serverType == SERVER_XEN_API:
+
+ pci_dev = sxp.children(pci, 'dev')[0]
+ domain = int(sxp.child_value(pci_dev, 'domain'), 16)
+ bus = int(sxp.child_value(pci_dev, 'bus'), 16)
+ slot = int(sxp.child_value(pci_dev, 'slot'), 16)
+ func = int(sxp.child_value(pci_dev, 'func'), 16)
+ vslt = int(sxp.child_value(pci_dev, 'vslt'), 16)
+ name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func)
+
+ target_ref = None
+ for dpci_ref in server.xenapi.VM.get_DPCIs(get_single_vm(dom)):
+ ppci_ref = server.xenapi.DPCI.get_PPCI(dpci_ref)
+ if name == server.xenapi.PPCI.get_name(ppci_ref):
+ target_ref = ppci_ref
+ server.xenapi.DPCI.destroy(dpci_ref)
+ break
+ if target_ref is None:
+ raise OptionError("Device %s not assigned" % name)
+
+ else:
+ server.xend.domain.device_configure(dom, pci)
def xm_scsi_detach(args):
xenapi_unsupported()
@@ -2617,6 +2738,7 @@ commands = {
"event-monitor": xm_event_monitor,
# console commands
"console": xm_console,
+ "vncviewer": xm_vncviewer,
# xenstat commands
"top": xm_top,
# domain commands
diff --git a/tools/python/xen/xm/shutdown.py b/tools/python/xen/xm/shutdown.py
index 2e6294a979..125dcc1e80 100644
--- a/tools/python/xen/xm/shutdown.py
+++ b/tools/python/xen/xm/shutdown.py
@@ -144,9 +144,10 @@ def main_all(opts, args):
def main_dom(opts, args):
if len(args) == 0: opts.err('No domain parameter given')
if len(args) > 1: opts.err('No multiple domain parameters allowed')
- dom = sxp.child_value(server.xend.domain(args[0]), 'name')
if serverType == SERVER_XEN_API:
- dom = get_single_vm(dom)
+ dom = get_single_vm(args[0])
+ else:
+ dom = sxp.child_value(server.xend.domain(args[0]), 'name')
mode = shutdown_mode(opts)
shutdown(opts, [ dom ], mode, opts.vals.wait)
diff --git a/tools/python/xen/xm/xenapi_create.py b/tools/python/xen/xm/xenapi_create.py
index df104bf72e..ceac76f1e1 100644
--- a/tools/python/xen/xm/xenapi_create.py
+++ b/tools/python/xen/xm/xenapi_create.py
@@ -369,6 +369,12 @@ class xenapi_create:
self.create_consoles(vm_ref, consoles)
+ # Now create pcis
+
+ pcis = vm.getElementsByTagName("pci")
+
+ self.create_pcis(vm_ref, pcis)
+
return vm_ref
except:
server.xenapi.VM.destroy(vm_ref)
@@ -389,7 +395,7 @@ class xenapi_create:
"device":
vbd.attributes["device"].value,
"bootable":
- vbd.attributes["bootable"].value == "True",
+ vbd.attributes["bootable"].value == "1",
"mode":
vbd.attributes["mode"].value,
"type":
@@ -493,6 +499,39 @@ class xenapi_create:
return server.xenapi.console.create(console_record)
+ def create_pcis(self, vm_ref, pcis):
+ log(DEBUG, "create_pcis")
+ return map(lambda pci: self.create_pci(vm_ref, pci), pcis)
+
+ def create_pci(self, vm_ref, pci):
+ log(DEBUG, "create_pci")
+
+ domain = int(pci.attributes["domain"].value, 16)
+ bus = int(pci.attributes["bus"].value, 16)
+ slot = int(pci.attributes["slot"].value, 16)
+ func = int(pci.attributes["func"].value, 16)
+ name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func)
+
+ target_ref = None
+ for ppci_ref in server.xenapi.PPCI.get_all():
+ if name == server.xenapi.PPCI.get_name(ppci_ref):
+ target_ref = ppci_ref
+ break
+ if target_ref is None:
+ log(DEBUG, "create_pci: pci device not found")
+ return None
+
+ dpci_record = {
+ "VM":
+ vm_ref,
+ "PPCI":
+ target_ref,
+ "hotplug_slot":
+ int(pci.attributes["func"].value, 16)
+ }
+
+ return server.xenapi.DPCI.create(dpci_record)
+
def get_child_by_name(exp, childname, default = None):
try:
return [child for child in sxp.children(exp)
@@ -521,6 +560,9 @@ class sxp2xml:
vfbs_sxp = map(lambda x: x[1], [device for device in devices
if device[1][0] == "vfb"])
+ pcis_sxp = map(lambda x: x[1], [device for device in devices
+ if device[1][0] == "pci"])
+
# Create XML Document
impl = getDOMImplementation()
@@ -597,13 +639,15 @@ class sxp2xml:
pv = document.createElement("pv")
pv.attributes["kernel"] \
= get_child_by_name(image, "kernel", "")
- pv.attributes["bootloader"] = ""
+ pv.attributes["bootloader"] \
+ = get_child_by_name(config, "bootloader", "")
pv.attributes["ramdisk"] \
= get_child_by_name(image, "ramdisk", "")
pv.attributes["args"] \
= "root=" + get_child_by_name(image, "root", "") \
+ " " + get_child_by_name(image, "args", "")
- pv.attributes["bootloader_args"] = ""
+ pv.attributes["bootloader_args"] \
+ = get_child_by_name(config, "bootloader_args","")
vm.appendChild(pv)
elif image[0] == "hvm":
@@ -654,6 +698,12 @@ class sxp2xml:
map(vm.appendChild, vtpms)
+ # And now the pcis
+
+ pcis = self.extract_pcis(pcis_sxp, document)
+
+ map(vm.appendChild, pcis)
+
# Last but not least the consoles...
consoles = self.extract_consoles(image, document)
@@ -821,7 +871,28 @@ class sxp2xml:
return vfb
- _eths = -1
+ def extract_pcis(self, pcis_sxp, document):
+
+ pcis = []
+
+ for pci_sxp in pcis_sxp:
+ for dev_sxp in sxp.children(pci_sxp, "dev"):
+ pci = document.createElement("pci")
+
+ pci.attributes["domain"] \
+ = get_child_by_name(dev_sxp, "domain", "0")
+ pci.attributes["bus"] \
+ = get_child_by_name(dev_sxp, "bus", "0")
+ pci.attributes["slot"] \
+ = get_child_by_name(dev_sxp, "slot", "0")
+ pci.attributes["func"] \
+ = get_child_by_name(dev_sxp, "func", "0")
+ pci.attributes["vslt"] \
+ = get_child_by_name(dev_sxp, "vslt", "0")
+
+ pcis.append(pci)
+
+ return pcis
def mk_other_config(self, key, value, document):
other_config = document.createElement("other_config")
@@ -914,6 +985,8 @@ class sxp2xml:
return platform_configs
+ _eths = -1
+
def getFreshEthDevice(self):
self._eths += 1
return "eth%i" % self._eths
diff --git a/tools/xenmon/Makefile b/tools/xenmon/Makefile
index 1a53d65d22..0b35f451d0 100644
--- a/tools/xenmon/Makefile
+++ b/tools/xenmon/Makefile
@@ -33,6 +33,8 @@ install: build
$(INSTALL_PROG) xenbaked $(DESTDIR)$(SBINDIR)/xenbaked
$(INSTALL_PROG) xentrace_setmask $(DESTDIR)$(SBINDIR)/xentrace_setmask
$(INSTALL_PROG) xenmon.py $(DESTDIR)$(SBINDIR)/xenmon.py
+ $(INSTALL_DIR) $(DESTDIR)$(DOCDIR)
+ $(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.xenmon
.PHONY: clean
clean:
diff --git a/tools/xenstat/libxenstat/Makefile b/tools/xenstat/libxenstat/Makefile
index f60ec7872e..1177b55ac0 100644
--- a/tools/xenstat/libxenstat/Makefile
+++ b/tools/xenstat/libxenstat/Makefile
@@ -15,7 +15,7 @@
XEN_ROOT=../../..
include $(XEN_ROOT)/tools/Rules.mk
-prefix=/usr
+prefix=$(PREFIX)
includedir=$(prefix)/include
libdir=$(prefix)/lib
diff --git a/tools/xenstat/libxenstat/src/xenstat.c b/tools/xenstat/libxenstat/src/xenstat.c
index 16a41dc1db..a5fbca1d5e 100644
--- a/tools/xenstat/libxenstat/src/xenstat.c
+++ b/tools/xenstat/libxenstat/src/xenstat.c
@@ -109,7 +109,7 @@ xenstat_handle *xenstat_init(void)
handle->xshandle = xs_daemon_open_readonly(); /* open handle to xenstore*/
if (handle->xshandle == NULL) {
- perror("unable to open xenstore\n");
+ perror("unable to open xenstore");
xc_interface_close(handle->xc_handle);
free(handle);
return NULL;
diff --git a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
index 5517039bd8..92b64e2589 100644
--- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
+++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c
@@ -71,7 +71,7 @@ static int bp_suspend(void)
return suspend_cancelled;
}
-int __xen_suspend(int fast_suspend, void (*resume_notifier)(void))
+int __xen_suspend(int fast_suspend, void (*resume_notifier)(int))
{
int err, suspend_cancelled, nr_cpus;
struct ap_suspend_info info;
@@ -101,7 +101,7 @@ int __xen_suspend(int fast_suspend, void (*resume_notifier)(void))
local_irq_disable();
suspend_cancelled = bp_suspend();
- resume_notifier();
+ resume_notifier(suspend_cancelled);
local_irq_enable();
smp_mb();
diff --git a/xen/Makefile b/xen/Makefile
index ee2e75ab70..7b0c28b421 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -1,8 +1,8 @@
# This is the correct place to edit the build version.
# All other places this is stored (eg. compile.h) should be autogenerated.
-export XEN_VERSION = 3
-export XEN_SUBVERSION = 3
-export XEN_EXTRAVERSION ?= -unstable$(XEN_VENDORVERSION)
+export XEN_VERSION = 4
+export XEN_SUBVERSION = 0
+export XEN_EXTRAVERSION ?= .0-rc3-pre$(XEN_VENDORVERSION)
export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-include xen-version
diff --git a/xen/arch/ia64/xen/domain.c b/xen/arch/ia64/xen/domain.c
index b45d75afd8..23911dd6ce 100644
--- a/xen/arch/ia64/xen/domain.c
+++ b/xen/arch/ia64/xen/domain.c
@@ -2212,8 +2212,9 @@ int __init construct_dom0(struct domain *d,
return 0;
}
-void machine_restart(void)
+void machine_restart(unsigned int delay_millisecs)
{
+ mdelay(delay_millisecs);
console_start_sync();
if (running_on_sim)
printk ("machine_restart called. spinning...\n");
diff --git a/xen/arch/ia64/xen/irq.c b/xen/arch/ia64/xen/irq.c
index 133afc9aa1..0f951552eb 100644
--- a/xen/arch/ia64/xen/irq.c
+++ b/xen/arch/ia64/xen/irq.c
@@ -459,7 +459,7 @@ int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
return rc;
}
-int pirq_guest_unbind(struct domain *d, int irq)
+void pirq_guest_unbind(struct domain *d, int irq)
{
irq_desc_t *desc = &irq_desc[irq];
irq_guest_action_t *action;
@@ -493,7 +493,6 @@ int pirq_guest_unbind(struct domain *d, int irq)
}
spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
}
void
diff --git a/xen/arch/x86/acpi/cpufreq/utility.c b/xen/arch/x86/acpi/cpufreq/utility.c
index e34586e3b5..787e91ceef 100644
--- a/xen/arch/x86/acpi/cpufreq/utility.c
+++ b/xen/arch/x86/acpi/cpufreq/utility.c
@@ -296,12 +296,11 @@ void cpufreq_suspend(void)
{
int cpu;
- /* to protect the case when Px was controlled by dom0-kernel */
- /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
+ /* to protect the case when Px was not controlled by xen */
for_each_online_cpu(cpu) {
struct processor_performance *perf = &processor_pminfo[cpu].perf;
- if (!perf->init)
+ if (!(perf->init & XEN_PX_INIT))
return;
}
@@ -316,14 +315,13 @@ int cpufreq_resume(void)
{
int cpu, ret = 0;
- /* 1. to protect the case when Px was controlled by dom0-kernel */
- /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
+ /* 1. to protect the case when Px was not controlled by xen */
/* 2. set state and resume flag to sync cpu to right state and freq */
for_each_online_cpu(cpu) {
struct processor_performance *perf = &processor_pminfo[cpu].perf;
struct cpufreq_policy *policy = &xen_px_policy[cpu];
- if (!perf->init)
+ if (!(perf->init & XEN_PX_INIT))
goto err;
perf->state = 0;
policy->resume = 1;
diff --git a/xen/arch/x86/acpi/pmstat.c b/xen/arch/x86/acpi/pmstat.c
index cf14fc9d64..ec408a2333 100644
--- a/xen/arch/x86/acpi/pmstat.c
+++ b/xen/arch/x86/acpi/pmstat.c
@@ -52,9 +52,9 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
struct pm_px *pxpt = &px_statistic_data[op->cpuid];
struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid];
- /* to protect the case when Px was controlled by dom0-kernel */
- /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
- if ( !pmpt->perf.init && (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
+ /* to protect the case when Px was not controlled by xen */
+ if ( (!(pmpt->perf.init & XEN_PX_INIT)) &&
+ (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
return -EINVAL;
if ( !cpu_online(op->cpuid) )
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a206625262..9e1bfda705 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1811,7 +1811,6 @@ int domain_relinquish_resources(struct domain *d)
if ( ret )
return ret;
#endif
- WARN_ON(d->xenheap_pages);
break;
default:
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 73233357f2..8bb24c9dd6 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -757,6 +757,7 @@ int __init construct_dom0(
si->shared_info = virt_to_maddr(d->shared_info);
si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ si->flags |= (xen_processor_pmbits << 8) & SIF_PM_MASK;
si->pt_base = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d);
si->nr_pt_frames = nr_pt_pages;
si->mfn_list = vphysmap_start;
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 52add55afa..8f1df3ed94 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -661,6 +661,7 @@ long arch_do_domctl(
if ( !iommu_pv_enabled && !is_hvm_domain(d) )
{
ret = -ENOSYS;
+ put_domain(d);
break;
}
@@ -669,12 +670,16 @@ long arch_do_domctl(
gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
"%x:%x:%x already assigned, or non-existent\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ put_domain(d);
break;
}
ret = assign_device(d, bus, devfn);
- gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ if ( ret )
+ gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
+ "assign device (%x:%x:%x) failed\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
put_domain(d);
}
break;
@@ -701,11 +706,15 @@ long arch_do_domctl(
if ( !iommu_pv_enabled && !is_hvm_domain(d) )
{
ret = -ENOSYS;
+ put_domain(d);
break;
}
if ( !device_assigned(bus, devfn) )
+ {
+ put_domain(d);
break;
+ }
ret = 0;
deassign_device(d, bus, devfn);
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 6a8e0885c0..0d7f5ffa6b 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -284,7 +284,7 @@ void dpci_ioport_write(uint32_t mport, ioreq_t *p)
data = p->data;
if ( p->data_is_ptr )
(void)hvm_copy_from_guest_phys(
- &data, p->data + (sign * i & p->size), p->size);
+ &data, p->data + (sign * i * p->size), p->size);
switch ( p->size )
{
diff --git a/xen/arch/x86/hvm/stdvga.c b/xen/arch/x86/hvm/stdvga.c
index abc1e1cbce..dd060fbd85 100644
--- a/xen/arch/x86/hvm/stdvga.c
+++ b/xen/arch/x86/hvm/stdvga.c
@@ -33,6 +33,10 @@
#include <xen/domain_page.h>
#include <asm/hvm/support.h>
#include <xen/numa.h>
+#include <xen/paging.h>
+
+#define VGA_MEM_BASE 0xa0000
+#define VGA_MEM_SIZE 0x20000
#define PAT(x) (x)
static const uint32_t mask16[16] = {
@@ -464,6 +468,7 @@ static int mmio_move(struct hvm_hw_stdvga *s, ioreq_t *p)
{
int i;
int sign = p->df ? -1 : 1;
+ p2m_type_t p2mt;
if ( p->data_is_ptr )
{
@@ -473,7 +478,19 @@ static int mmio_move(struct hvm_hw_stdvga *s, ioreq_t *p)
for ( i = 0; i < p->count; i++ )
{
tmp = stdvga_mem_read(addr, p->size);
- hvm_copy_to_guest_phys(data, &tmp, p->size);
+ if ( hvm_copy_to_guest_phys(data, &tmp, p->size) ==
+ HVMCOPY_bad_gfn_to_mfn )
+ {
+ (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt);
+ /*
+ * The only case we handle is vga_mem <-> vga_mem.
+ * Anything else disables caching and leaves it to qemu-dm.
+ */
+ if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) ||
+ ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) )
+ return 0;
+ stdvga_mem_write(data, tmp, p->size);
+ }
data += sign * p->size;
addr += sign * p->size;
}
@@ -483,7 +500,15 @@ static int mmio_move(struct hvm_hw_stdvga *s, ioreq_t *p)
uint32_t addr = p->addr, data = p->data, tmp;
for ( i = 0; i < p->count; i++ )
{
- hvm_copy_from_guest_phys(&tmp, data, p->size);
+ if ( hvm_copy_from_guest_phys(&tmp, data, p->size) ==
+ HVMCOPY_bad_gfn_to_mfn )
+ {
+ (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt);
+ if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) ||
+ ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) )
+ return 0;
+ tmp = stdvga_mem_read(data, p->size);
+ }
stdvga_mem_write(addr, tmp, p->size);
data += sign * p->size;
addr += sign * p->size;
@@ -536,7 +561,8 @@ static int stdvga_intercept_mmio(ioreq_t *p)
{
case IOREQ_TYPE_COPY:
buf = mmio_move(s, p);
- break;
+ if ( buf )
+ break;
default:
gdprintk(XENLOG_WARNING, "unsupported mmio request type:%d "
"addr:0x%04x data:0x%04x size:%d count:%d state:%d "
@@ -588,7 +614,7 @@ void stdvga_init(struct domain *d)
register_portio_handler(d, 0x3ce, 2, stdvga_intercept_pio);
/* MMIO. */
register_buffered_io_handler(
- d, 0xa0000, 0x20000, stdvga_intercept_mmio);
+ d, VGA_MEM_BASE, VGA_MEM_SIZE, stdvga_intercept_mmio);
}
}
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index ea72939413..9b0b86d426 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -874,9 +874,12 @@ static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
mfn_t mfn;
unsigned long gfn = gpa >> PAGE_SHIFT;
- /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
+ /*
+ * If this GFN is emulated MMIO or marked as read-only, pass the fault
+ * to the mmio handler.
+ */
mfn = gfn_to_mfn_current(gfn, &p2mt);
- if ( p2mt == p2m_mmio_dm )
+ if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
{
if ( !handle_mmio() )
hvm_inject_exception(TRAP_gp_fault, 0, 0);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index fbefbd7f75..161a36dec4 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1971,7 +1971,7 @@ static void ept_handle_violation(unsigned long qualification, paddr_t gpa)
}
mfn = gfn_to_mfn(d, gfn, &t);
- if ( p2m_is_ram(t) && paging_mode_log_dirty(d) )
+ if ( (t != p2m_ram_ro) && p2m_is_ram(t) && paging_mode_log_dirty(d) )
{
paging_mark_dirty(d, mfn_x(mfn));
p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index c661320663..643f61f669 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -45,12 +45,12 @@
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
-int msi_irq_enable = 0;
-boolean_param("msi_irq_enable", msi_irq_enable);
+int msi_enable = 0;
+boolean_param("msi", msi_enable);
int domain_irq_to_vector(struct domain *d, int irq)
{
- if ( !msi_irq_enable )
+ if ( !msi_enable )
return irq_to_vector(irq);
else
return d->arch.pirq_vector[irq];
@@ -58,7 +58,7 @@ int domain_irq_to_vector(struct domain *d, int irq)
int domain_vector_to_irq(struct domain *d, int vector)
{
- if ( !msi_irq_enable )
+ if ( !msi_enable )
return vector_to_irq(vector);
else
return d->arch.vector_pirq[vector];
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index abd35f11f1..a9fa6ddf76 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -573,7 +573,7 @@ int pirq_guest_bind(struct vcpu *v, int irq, int will_share)
return rc;
}
-int pirq_guest_unbind(struct domain *d, int irq)
+void pirq_guest_unbind(struct domain *d, int irq)
{
unsigned int vector;
irq_desc_t *desc;
@@ -660,7 +660,6 @@ int pirq_guest_unbind(struct domain *d, int irq)
out:
spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
}
extern void dump_ioapic_irq_info(void);
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index afe4d965d6..9aa18a9c3c 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1138,8 +1138,10 @@ static int alloc_l2_table(struct page_info *page, unsigned long type)
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
- if ( is_guest_l2_slot(d, type, i) &&
- unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
+ if ( !is_guest_l2_slot(d, type, i) )
+ continue;
+
+ if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
goto fail;
adjust_guest_l2e(pl2e[i], d);
@@ -1206,8 +1208,9 @@ static int alloc_l3_table(struct page_info *page)
d) )
goto fail;
}
- else if ( is_guest_l3_slot(i) &&
- unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
+ else if ( !is_guest_l3_slot(i) )
+ continue;
+ else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
goto fail;
adjust_guest_l3e(pl3e[i], d);
@@ -1222,8 +1225,12 @@ static int alloc_l3_table(struct page_info *page)
fail:
MEM_LOG("Failure in alloc_l3_table: entry %d", i);
while ( i-- > 0 )
- if ( is_guest_l3_slot(i) )
- put_page_from_l3e(pl3e[i], pfn);
+ {
+ if ( !is_guest_l3_slot(i) )
+ continue;
+ unadjust_guest_l3e(pl3e[i], d);
+ put_page_from_l3e(pl3e[i], pfn);
+ }
unmap_domain_page(pl3e);
return 0;
@@ -1242,8 +1249,10 @@ static int alloc_l4_table(struct page_info *page)
for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
{
- if ( is_guest_l4_slot(d, i) &&
- unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
+ if ( !is_guest_l4_slot(d, i) )
+ continue;
+
+ if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
goto fail;
adjust_guest_l4e(pl4e[i], d);
@@ -1585,7 +1594,7 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
struct vcpu *curr = current;
struct domain *d = curr->domain;
struct page_info *l3pg = mfn_to_page(pfn);
- int okay, rc = 1;
+ int rc = 1;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
{
@@ -1642,10 +1651,13 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
return 0;
}
- okay = create_pae_xen_mappings(d, pl3e);
- BUG_ON(!okay);
+ if ( likely(rc) )
+ {
+ if ( !create_pae_xen_mappings(d, pl3e) )
+ BUG();
- pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+ pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
+ }
page_unlock(l3pg);
put_page_from_l3e(ol3e, pfn);
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 209a14055b..dec5e9a09f 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3359,7 +3359,7 @@ static int sh_page_fault(struct vcpu *v,
gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory"
" page. va page=%#lx, mfn=%#lx\n",
va & PAGE_MASK, mfn_x(gmfn));
- goto emulate; /* skip over the instruction */
+ goto emulate_readonly; /* skip over the instruction */
}
/* In HVM guests, we force CR0.WP always to be set, so that the
@@ -3405,6 +3405,11 @@ static int sh_page_fault(struct vcpu *v,
}
/*
+ * Write from userspace to ro-mem needs to jump here to avoid getting
+ * caught by user-mode page-table check above.
+ */
+ emulate_readonly:
+ /*
* We don't need to hold the lock for the whole emulation; we will
* take it again when we write to the pagetables.
*/
@@ -4640,15 +4645,9 @@ static void *emulate_map_dest(struct vcpu *v,
u32 bytes,
struct sh_emulate_ctxt *sh_ctxt)
{
- struct segment_register *sreg;
unsigned long offset;
void *map = NULL;
- /* We don't emulate user-mode writes to page tables */
- sreg = hvm_get_seg_reg(x86_seg_ss, sh_ctxt);
- if ( sreg->attr.fields.dpl == 3 )
- return MAPPING_UNHANDLEABLE;
-
sh_ctxt->mfn1 = emulate_gva_to_mfn(v, vaddr, sh_ctxt);
if ( !mfn_valid(sh_ctxt->mfn1) )
return ((mfn_x(sh_ctxt->mfn1) == BAD_GVA_TO_GFN) ?
@@ -4656,6 +4655,16 @@ static void *emulate_map_dest(struct vcpu *v,
(mfn_x(sh_ctxt->mfn1) == READONLY_GFN) ?
MAPPING_SILENT_FAIL : MAPPING_UNHANDLEABLE);
+#ifndef NDEBUG
+ /* We don't emulate user-mode writes to page tables */
+ if ( hvm_get_seg_reg(x86_seg_ss, sh_ctxt)->attr.fields.dpl == 3 )
+ {
+ gdprintk(XENLOG_DEBUG, "User-mode write to pagetable reached "
+ "emulate_map_dest(). This should never happen!\n");
+ return MAPPING_UNHANDLEABLE;
+ }
+#endif
+
/* Unaligned writes mean probably this isn't a pagetable */
if ( vaddr & (bytes - 1) )
sh_remove_shadows(v, sh_ctxt->mfn1, 0, 0 /* Slow, can fail */ );
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index c0ccba6d9a..91e725049a 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -27,8 +27,6 @@
#include <public/physdev.h>
#include <xen/iommu.h>
-extern int msi_irq_enable;
-
/* bitmap indicate which fixed map is free */
DEFINE_SPINLOCK(msix_fixmap_lock);
DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
@@ -763,14 +761,13 @@ retry:
{
desc = &irq_desc[entry->vector];
- local_irq_save(flags);
- if ( !spin_trylock(&desc->lock) )
- {
- local_irq_restore(flags);
- goto retry;
- }
+ local_irq_save(flags);
+ if ( !spin_trylock(&desc->lock) )
+ {
+ local_irq_restore(flags);
+ goto retry;
+ }
- spin_lock_irqsave(&desc->lock, flags);
if ( desc->handler == &pci_msi_type )
{
/* MSI is not shared, so should be released already */
diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index ab3050b3e6..0debd987bc 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -14,6 +14,7 @@
#include <xen/time.h>
#include <xen/smp.h>
#include <asm/acpi.h>
+#include <xen/sched.h>
static int numa_setup(char *s);
custom_param("numa", numa_setup);
@@ -281,6 +282,9 @@ static void dump_numa(unsigned char key)
{
s_time_t now = NOW();
int i;
+ struct domain *d;
+ struct page_info *page;
+ unsigned int page_num_node[MAX_NUMNODES];
printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key,
(u32)(now>>32), (u32)now);
@@ -297,6 +301,28 @@ static void dump_numa(unsigned char key)
}
for_each_online_cpu(i)
printk("CPU%d -> NODE%d\n", i, cpu_to_node[i]);
+
+ rcu_read_lock(&domlist_read_lock);
+
+ printk("Memory location of each domain:\n");
+ for_each_domain(d)
+ {
+ printk("Domain %u (total: %u):\n", d->domain_id, d->tot_pages);
+
+ for_each_online_node(i)
+ page_num_node[i] = 0;
+
+ list_for_each_entry(page, &d->page_list, list)
+ {
+ i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
+ page_num_node[i]++;
+ }
+
+ for_each_online_node(i)
+ printk(" Node %u: %u\n", i, page_num_node[i]);
+ }
+
+ rcu_read_unlock(&domlist_read_lock);
}
static __init int register_numa_trigger(void)
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index d1e4f39e75..acb46cbac0 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -184,15 +184,14 @@ static int unmap_domain_pirq(struct domain *d, int pirq)
return ret;
}
-extern int msi_irq_enable;
static int physdev_map_pirq(struct physdev_map_pirq *map)
{
struct domain *d;
int vector, pirq, ret = 0;
unsigned long flags;
- /* if msi_irq_enable is not enabled,map always success */
- if ( !msi_irq_enable )
+ /* if msi_enable is not enabled, map always succeeds */
+ if ( !msi_enable )
return 0;
if ( !IS_PRIV(current->domain) )
@@ -304,7 +303,7 @@ static int physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
unsigned long flags;
int ret;
- if ( !msi_irq_enable )
+ if ( !msi_enable )
return 0;
if ( !IS_PRIV(current->domain) )
@@ -455,7 +454,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
ret = 0;
- if ( msi_irq_enable )
+ if ( msi_enable )
{
spin_lock_irqsave(&dom0->arch.irq_lock, flags);
if ( irq != AUTO_ASSIGN )
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 0e2579d23d..b93dc71a23 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -355,6 +355,11 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
struct processor_pminfo *pmpt;
struct processor_performance *pxpt;
+ if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
if ( cpuid < 0 )
{
ret = -EINVAL;
@@ -373,6 +378,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
memcpy ((void *)&pxpt->status_register,
(void *)&xenpxpt->status_register,
sizeof(struct xen_pct_register));
+ pxpt->init |= XEN_PX_PCT;
}
if ( xenpxpt->flags & XEN_PX_PSS )
{
@@ -390,6 +396,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
break;
}
pxpt->state_count = xenpxpt->state_count;
+ pxpt->init |= XEN_PX_PSS;
}
if ( xenpxpt->flags & XEN_PX_PSD )
{
@@ -397,14 +404,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
memcpy ((void *)&pxpt->domain_info,
(void *)&xenpxpt->domain_info,
sizeof(struct xen_psd_package));
+ pxpt->init |= XEN_PX_PSD;
}
if ( xenpxpt->flags & XEN_PX_PPC )
+ {
pxpt->ppc = xenpxpt->ppc;
+ pxpt->init |= XEN_PX_PPC;
+ }
- if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS |
- XEN_PX_PSD | XEN_PX_PPC ) )
+ if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS |
+ XEN_PX_PSD | XEN_PX_PPC ) )
{
- pxpt->init =1;
+ pxpt->init |= XEN_PX_INIT;
cpu_count++;
}
if ( cpu_count == num_online_cpus() )
@@ -418,10 +429,20 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
}
case XEN_PM_CX:
+ if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
ret = set_cx_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.power);
break;
case XEN_PM_TX:
+ if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_TX) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
ret = -EINVAL;
break;
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index cd0c892a86..c1a2642528 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -997,7 +997,6 @@ void __init __start_xen(unsigned long mbi_p)
if ( (cmdline != NULL) || (kextra != NULL) )
{
static char dom0_cmdline[MAX_GUEST_CMDLINE];
- char xen_pm_param[32];
cmdline = cmdline_cook(cmdline);
safe_strcpy(dom0_cmdline, cmdline);
@@ -1022,14 +1021,6 @@ void __init __start_xen(unsigned long mbi_p)
safe_strcat(dom0_cmdline, " acpi=");
safe_strcat(dom0_cmdline, acpi_param);
}
- if ( xen_cpuidle )
- xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
-
- snprintf(xen_pm_param, sizeof(xen_pm_param),
- " xen_processor_pmbits=%d", xen_processor_pmbits);
-
- if ( !strstr(dom0_cmdline, "xen_processor_pmbits=") )
- safe_strcat(dom0_cmdline, xen_pm_param);
cmdline = dom0_cmdline;
}
@@ -1041,6 +1032,9 @@ void __init __start_xen(unsigned long mbi_p)
_initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
}
+ if ( xen_cpuidle )
+ xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
+
/*
* We're going to setup domain0 using the module(s) that we stashed safely
* above our heap. The second module, if present, is an initrd ramdisk.
diff --git a/xen/arch/x86/shutdown.c b/xen/arch/x86/shutdown.c
index a65771575f..2e8c622963 100644
--- a/xen/arch/x86/shutdown.c
+++ b/xen/arch/x86/shutdown.c
@@ -291,7 +291,12 @@ __initcall(reboot_init);
#endif
-void machine_restart(void)
+static void __machine_restart(void *pdelay)
+{
+ machine_restart(*(unsigned int *)pdelay);
+}
+
+void machine_restart(unsigned int delay_millisecs)
{
int i;
@@ -304,14 +309,16 @@ void machine_restart(void)
if ( get_apic_id() != boot_cpu_physical_apicid )
{
/* Send IPI to the boot CPU (logical cpu 0). */
- on_selected_cpus(cpumask_of_cpu(0), (void *)machine_restart,
- NULL, 1, 0);
+ on_selected_cpus(cpumask_of_cpu(0), __machine_restart,
+ &delay_millisecs, 1, 0);
for ( ; ; )
halt();
}
smp_send_stop();
+ mdelay(delay_millisecs);
+
if ( tboot_in_measured_env() )
tboot_shutdown(TB_SHUTDOWN_REBOOT);
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 72f1c4a6c1..94eb65a3da 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -35,8 +35,6 @@
static char opt_clocksource[10];
string_param("clocksource", opt_clocksource);
-#define EPOCH MILLISECS(1000)
-
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
DEFINE_SPINLOCK(rtc_lock);
unsigned long pit0_ticks;
@@ -55,7 +53,6 @@ struct cpu_time {
s_time_t stime_master_stamp;
struct time_scale tsc_scale;
u64 cstate_plt_count_stamp;
- struct timer calibration_timer;
};
struct platform_timesource {
@@ -67,6 +64,10 @@ struct platform_timesource {
static DEFINE_PER_CPU(struct cpu_time, cpu_time);
+/* Calibrate all CPUs to platform timer every EPOCH. */
+#define EPOCH MILLISECS(1000)
+static struct timer calibration_timer;
+
/* TSC is invariant on C state entry? */
static bool_t tsc_invariant;
@@ -481,35 +482,6 @@ static int init_pmtimer(struct platform_timesource *pts)
}
/************************************************************
- * PLATFORM TIMER 5: TSC
- */
-
-static const char plt_tsc_name[] = "TSC";
-#define platform_timer_is_tsc() (plt_src.name == plt_tsc_name)
-
-static int init_tsctimer(struct platform_timesource *pts)
-{
- if ( !tsc_invariant )
- return 0;
-
- pts->name = (char *)plt_tsc_name;
- return 1;
-}
-
-static void make_tsctimer_record(void)
-{
- struct cpu_time *t = &this_cpu(cpu_time);
- s_time_t now;
- u64 tsc;
-
- rdtscll(tsc);
- now = scale_delta(tsc, &t->tsc_scale);
-
- t->local_tsc_stamp = tsc;
- t->stime_local_stamp = t->stime_master_stamp = now;
-}
-
-/************************************************************
* GENERIC PLATFORM TIMER INFRASTRUCTURE
*/
@@ -530,11 +502,11 @@ static void plt_overflow(void *unused)
{
u64 count;
- spin_lock(&platform_timer_lock);
+ spin_lock_irq(&platform_timer_lock);
count = plt_src.read_counter();
plt_stamp64 += (count - plt_stamp) & plt_mask;
plt_stamp = count;
- spin_unlock(&platform_timer_lock);
+ spin_unlock_irq(&platform_timer_lock);
set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
}
@@ -551,6 +523,8 @@ static s_time_t read_platform_stime(void)
u64 count;
s_time_t stime;
+ ASSERT(!local_irq_is_enabled());
+
spin_lock(&platform_timer_lock);
count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stime = __read_platform_stime(count);
@@ -564,22 +538,16 @@ static void platform_time_calibration(void)
u64 count;
s_time_t stamp;
- spin_lock(&platform_timer_lock);
+ spin_lock_irq(&platform_timer_lock);
count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stamp = __read_platform_stime(count);
stime_platform_stamp = stamp;
platform_timer_stamp = count;
- spin_unlock(&platform_timer_lock);
+ spin_unlock_irq(&platform_timer_lock);
}
static void resume_platform_timer(void)
{
- if ( platform_timer_is_tsc() )
- {
- /* TODO: Save/restore TSC values. */
- return;
- }
-
/* No change in platform_stime across suspend/resume. */
platform_timer_stamp = plt_stamp64;
plt_stamp = plt_src.read_counter();
@@ -600,8 +568,6 @@ static void init_platform_timer(void)
rc = init_cyclone(pts);
else if ( !strcmp(opt_clocksource, "acpi") )
rc = init_pmtimer(pts);
- else if ( !strcmp(opt_clocksource, "tsc") )
- rc = init_tsctimer(pts);
if ( rc <= 0 )
printk("WARNING: %s clocksource '%s'.\n",
@@ -615,12 +581,6 @@ static void init_platform_timer(void)
!init_pmtimer(pts) )
init_pit(pts);
- if ( platform_timer_is_tsc() )
- {
- printk("Platform timer is TSC\n");
- return;
- }
-
plt_mask = (u64)~0ull >> (64 - pts->counter_bits);
set_time_scale(&plt_scale, pts->frequency);
@@ -823,10 +783,6 @@ int cpu_frequency_change(u64 freq)
struct cpu_time *t = &this_cpu(cpu_time);
u64 curr_tsc;
- /* Nothing to do if TSC is platform timer. Assume it is constant-rate. */
- if ( platform_timer_is_tsc() )
- return 0;
-
/* Sanity check: CPU frequency allegedly dropping below 1MHz? */
if ( freq < 1000000u )
{
@@ -847,9 +803,11 @@ int cpu_frequency_change(u64 freq)
local_irq_enable();
/* A full epoch should pass before we check for deviation. */
- set_timer(&t->calibration_timer, NOW() + EPOCH);
if ( smp_processor_id() == 0 )
+ {
+ set_timer(&calibration_timer, NOW() + EPOCH);
platform_time_calibration();
+ }
return 0;
}
@@ -875,9 +833,20 @@ void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
rcu_read_unlock(&domlist_read_lock);
}
+/* Per-CPU communication between rendezvous IRQ and softirq handler. */
+struct cpu_calibration {
+ u64 local_tsc_stamp;
+ s_time_t stime_local_stamp;
+ s_time_t stime_master_stamp;
+ struct timer softirq_callback;
+};
+static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);
+
+/* Softirq handler for per-CPU time calibration. */
static void local_time_calibration(void *unused)
{
struct cpu_time *t = &this_cpu(cpu_time);
+ struct cpu_calibration *c = &this_cpu(cpu_calibration);
/*
* System timestamps, extrapolated from local and master oscillators,
@@ -908,26 +877,15 @@ static void local_time_calibration(void *unused)
/* The overall calibration scale multiplier. */
u32 calibration_mul_frac;
- if ( platform_timer_is_tsc() )
- {
- make_tsctimer_record();
- update_vcpu_system_time(current);
- set_timer(&t->calibration_timer, NOW() + MILLISECS(10*1000));
- return;
- }
-
prev_tsc = t->local_tsc_stamp;
prev_local_stime = t->stime_local_stamp;
prev_master_stime = t->stime_master_stamp;
- /*
- * Disable IRQs to get 'instantaneous' current timestamps. We read platform
- * time first, as we may be delayed when acquiring platform_timer_lock.
- */
+ /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
local_irq_disable();
- curr_master_stime = read_platform_stime();
- curr_local_stime = get_s_time();
- rdtscll(curr_tsc);
+ curr_tsc = c->local_tsc_stamp;
+ curr_local_stime = c->stime_local_stamp;
+ curr_master_stime = c->stime_master_stamp;
local_irq_enable();
#if 0
@@ -1021,10 +979,62 @@ static void local_time_calibration(void *unused)
update_vcpu_system_time(current);
out:
- set_timer(&t->calibration_timer, NOW() + EPOCH);
-
if ( smp_processor_id() == 0 )
+ {
+ set_timer(&calibration_timer, NOW() + EPOCH);
platform_time_calibration();
+ }
+}
+
+/*
+ * Rendezvous for all CPUs in IRQ context.
+ * Master CPU snapshots the platform timer.
+ * All CPUS snapshot their local TSC and extrapolation of system time.
+ */
+struct calibration_rendezvous {
+ atomic_t nr_cpus;
+ s_time_t master_stime;
+};
+
+static void time_calibration_rendezvous(void *_r)
+{
+ unsigned int total_cpus = num_online_cpus();
+ struct cpu_calibration *c = &this_cpu(cpu_calibration);
+ struct calibration_rendezvous *r = _r;
+
+ local_irq_disable();
+
+ if ( smp_processor_id() == 0 )
+ {
+ while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
+ cpu_relax();
+ r->master_stime = read_platform_stime();
+ atomic_inc(&r->nr_cpus);
+ }
+ else
+ {
+ atomic_inc(&r->nr_cpus);
+ while ( atomic_read(&r->nr_cpus) != total_cpus )
+ cpu_relax();
+ }
+
+ rdtscll(c->local_tsc_stamp);
+ c->stime_local_stamp = get_s_time();
+ c->stime_master_stamp = r->master_stime;
+
+ local_irq_enable();
+
+ /* Callback in softirq context as soon as possible. */
+ set_timer(&c->softirq_callback, c->stime_local_stamp);
+}
+
+static void time_calibration(void *unused)
+{
+ struct calibration_rendezvous r = {
+ .nr_cpus = ATOMIC_INIT(0)
+ };
+
+ on_each_cpu(time_calibration_rendezvous, &r, 0, 1);
}
void init_percpu_time(void)
@@ -1033,12 +1043,6 @@ void init_percpu_time(void)
unsigned long flags;
s_time_t now;
- if ( platform_timer_is_tsc() )
- {
- make_tsctimer_record();
- goto out;
- }
-
local_irq_save(flags);
rdtscll(t->local_tsc_stamp);
now = !plt_src.read_counter ? 0 : read_platform_stime();
@@ -1047,10 +1051,14 @@ void init_percpu_time(void)
t->stime_master_stamp = now;
t->stime_local_stamp = now;
- out:
- init_timer(&t->calibration_timer, local_time_calibration,
- NULL, smp_processor_id());
- set_timer(&t->calibration_timer, NOW() + EPOCH);
+ init_timer(&this_cpu(cpu_calibration).softirq_callback,
+ local_time_calibration, NULL, smp_processor_id());
+
+ if ( smp_processor_id() == 0 )
+ {
+ init_timer(&calibration_timer, time_calibration, NULL, 0);
+ set_timer(&calibration_timer, NOW() + EPOCH);
+ }
}
/* Late init function (after all CPUs are booted). */
@@ -1134,7 +1142,10 @@ void pit_broadcast_enter(void)
void pit_broadcast_exit(void)
{
- cpu_clear(smp_processor_id(), pit_broadcast_mask);
+ int cpu = smp_processor_id();
+
+ if ( cpu_test_and_clear(cpu, pit_broadcast_mask) )
+ reprogram_timer(per_cpu(timer_deadline, cpu));
}
int pit_broadcast_is_available(void)
@@ -1163,10 +1174,11 @@ int time_suspend(void)
{
cmos_utc_offset = -get_cmos_time();
cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
+ kill_timer(&calibration_timer);
}
/* Better to cancel calibration timer for accuracy. */
- kill_timer(&this_cpu(cpu_time).calibration_timer);
+ kill_timer(&this_cpu(cpu_calibration).softirq_callback);
return 0;
}
diff --git a/xen/arch/x86/x86_64/physdev.c b/xen/arch/x86/x86_64/physdev.c
index 42b8269e4d..32abe933d3 100644
--- a/xen/arch/x86/x86_64/physdev.c
+++ b/xen/arch/x86/x86_64/physdev.c
@@ -30,6 +30,15 @@
#define physdev_irq_status_query compat_physdev_irq_status_query
#define physdev_irq_status_query_t physdev_irq_status_query_compat_t
+#define physdev_map_pirq compat_physdev_map_pirq
+#define physdev_map_pirq_t physdev_map_pirq_compat_t
+
+#define physdev_unmap_pirq compat_physdev_unmap_pirq
+#define physdev_unmap_pirq_t physdev_unmap_pirq_compat_t
+
+#define physdev_manage_pci compat_physdev_manage_pci
+#define physdev_manage_pci_t physdev_manage_pci_compat_t
+
#define COMPAT
#undef guest_handle_okay
#define guest_handle_okay compat_handle_okay
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 600705ff84..6fc1955783 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1606,6 +1606,7 @@ x86_emulate(
dst.val = _regs.eax;
case 0x38 ... 0x3b: cmp: /* cmp */
emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
+ dst.type = OP_NONE;
break;
case 0x62: /* bound */ {
@@ -1707,6 +1708,7 @@ x86_emulate(
dst.val = _regs.eax;
case 0x84 ... 0x85: test: /* test */
emulate_2op_SrcV("test", src, dst, _regs.eflags);
+ dst.type = OP_NONE;
break;
case 0x86 ... 0x87: xchg: /* xchg */
diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c
index fa4322e77d..f3a6d93776 100644
--- a/xen/common/compat/grant_table.c
+++ b/xen/common/compat/grant_table.c
@@ -138,7 +138,6 @@ int compat_grant_table_op(unsigned int cmd,
for ( i = 0; i < (_s_)->nr_frames; ++i ) \
{ \
unsigned int frame = (_s_)->frame_list.p[i]; \
- BUG_ON(frame != (_s_)->frame_list.p[i]); \
(void)__copy_to_compat_offset((_d_)->frame_list, i, &frame, 1); \
} \
} \
diff --git a/xen/common/domain.c b/xen/common/domain.c
index b420b4b219..6bf1f49161 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -50,7 +50,7 @@ static void __init setup_cpufreq_option(char *str)
else if ( !strcmp(str, "xen") )
{
xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
- cpufreq_controller = FREQCTL_none;
+ cpufreq_controller = FREQCTL_xen;
}
}
custom_param("cpufreq", setup_cpufreq_option);
@@ -137,6 +137,8 @@ struct vcpu *alloc_vcpu(
v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
v->runstate.state_entry_time = NOW();
+ spin_lock_init(&v->virq_lock);
+
if ( !is_idle_domain(d) )
{
set_bit(_VPF_down, &v->pause_flags);
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 84cc455fdb..3e8956e0e7 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -386,14 +386,18 @@ static long __evtchn_close(struct domain *d1, int port1)
break;
case ECS_PIRQ:
- if ( (rc = pirq_guest_unbind(d1, chn1->u.pirq)) == 0 )
- d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+ pirq_guest_unbind(d1, chn1->u.pirq);
+ d1->pirq_to_evtchn[chn1->u.pirq] = 0;
break;
case ECS_VIRQ:
for_each_vcpu ( d1, v )
- if ( v->virq_to_evtchn[chn1->u.virq] == port1 )
- v->virq_to_evtchn[chn1->u.virq] = 0;
+ {
+ if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
+ continue;
+ v->virq_to_evtchn[chn1->u.virq] = 0;
+ spin_barrier(&v->virq_lock);
+ }
break;
case ECS_IPI:
@@ -447,6 +451,9 @@ static long __evtchn_close(struct domain *d1, int port1)
BUG();
}
+ /* Clear pending event to avoid unexpected behavior on re-bind. */
+ clear_bit(port1, &shared_info(d1, evtchn_pending));
+
/* Reset binding to vcpu0 when the channel is freed. */
chn1->state = ECS_FREE;
chn1->notify_vcpu_id = 0;
@@ -573,37 +580,33 @@ static int evtchn_set_pending(struct vcpu *v, int port)
return 0;
}
+int guest_enabled_event(struct vcpu *v, int virq)
+{
+ return ((v != NULL) && (v->virq_to_evtchn[virq] != 0));
+}
void send_guest_vcpu_virq(struct vcpu *v, int virq)
{
+ unsigned long flags;
int port;
ASSERT(!virq_is_global(virq));
+ spin_lock_irqsave(&v->virq_lock, flags);
+
port = v->virq_to_evtchn[virq];
if ( unlikely(port == 0) )
- return;
+ goto out;
evtchn_set_pending(v, port);
-}
-
-int guest_enabled_event(struct vcpu *v, int virq)
-{
- int port;
-
- if ( unlikely(v == NULL) )
- return 0;
- port = v->virq_to_evtchn[virq];
- if ( port == 0 )
- return 0;
-
- /* virq is in use */
- return 1;
+ out:
+ spin_unlock_irqrestore(&v->virq_lock, flags);
}
void send_guest_global_virq(struct domain *d, int virq)
{
+ unsigned long flags;
int port;
struct vcpu *v;
struct evtchn *chn;
@@ -617,20 +620,28 @@ void send_guest_global_virq(struct domain *d, int virq)
if ( unlikely(v == NULL) )
return;
+ spin_lock_irqsave(&v->virq_lock, flags);
+
port = v->virq_to_evtchn[virq];
if ( unlikely(port == 0) )
- return;
+ goto out;
chn = evtchn_from_port(d, port);
evtchn_set_pending(d->vcpu[chn->notify_vcpu_id], port);
-}
+ out:
+ spin_unlock_irqrestore(&v->virq_lock, flags);
+}
int send_guest_pirq(struct domain *d, int pirq)
{
int port = d->pirq_to_evtchn[pirq];
struct evtchn *chn;
+ /*
+ * It should not be possible to race with __evtchn_close():
+ * The caller of this function must synchronise with pirq_guest_unbind().
+ */
ASSERT(port != 0);
chn = evtchn_from_port(d, port);
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 56c59905b2..d478db8ad3 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -143,7 +143,7 @@ static void dump_dom0_registers(unsigned char key)
static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
{
printk("'%c' pressed -> rebooting machine\n", key);
- machine_restart();
+ machine_restart(0);
}
static void cpuset_print(char *set, int size, cpumask_t mask)
@@ -236,6 +236,7 @@ static void dump_domains(unsigned char key)
static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
static s_time_t read_clocks_time[NR_CPUS];
+static u64 read_cycles_time[NR_CPUS];
static void read_clocks_slave(void *unused)
{
@@ -244,14 +245,20 @@ static void read_clocks_slave(void *unused)
while ( !cpu_isset(cpu, read_clocks_cpumask) )
cpu_relax();
read_clocks_time[cpu] = NOW();
+ read_cycles_time[cpu] = get_cycles();
cpu_clear(cpu, read_clocks_cpumask);
local_irq_enable();
}
static void read_clocks(unsigned char key)
{
- unsigned int cpu = smp_processor_id(), min_cpu, max_cpu;
- u64 min, max, dif, difus;
+ unsigned int cpu = smp_processor_id(), min_stime_cpu, max_stime_cpu;
+ unsigned int min_cycles_cpu, max_cycles_cpu;
+ u64 min_stime, max_stime, dif_stime;
+ u64 min_cycles, max_cycles, dif_cycles;
+ static u64 sumdif_stime = 0, maxdif_stime = 0;
+ static u64 sumdif_cycles = 0, maxdif_cycles = 0;
+ static u32 count = 0;
static DEFINE_SPINLOCK(lock);
spin_lock(&lock);
@@ -261,31 +268,48 @@ static void read_clocks(unsigned char key)
local_irq_disable();
read_clocks_cpumask = cpu_online_map;
read_clocks_time[cpu] = NOW();
+ read_cycles_time[cpu] = get_cycles();
cpu_clear(cpu, read_clocks_cpumask);
local_irq_enable();
while ( !cpus_empty(read_clocks_cpumask) )
cpu_relax();
- min_cpu = max_cpu = cpu;
+ min_stime_cpu = max_stime_cpu = min_cycles_cpu = max_cycles_cpu = cpu;
for_each_online_cpu ( cpu )
{
- if ( read_clocks_time[cpu] < read_clocks_time[min_cpu] )
- min_cpu = cpu;
- if ( read_clocks_time[cpu] > read_clocks_time[max_cpu] )
- max_cpu = cpu;
+ if ( read_clocks_time[cpu] < read_clocks_time[min_stime_cpu] )
+ min_stime_cpu = cpu;
+ if ( read_clocks_time[cpu] > read_clocks_time[max_stime_cpu] )
+ max_stime_cpu = cpu;
+ if ( read_cycles_time[cpu] < read_cycles_time[min_cycles_cpu] )
+ min_cycles_cpu = cpu;
+ if ( read_cycles_time[cpu] > read_cycles_time[max_cycles_cpu] )
+ max_cycles_cpu = cpu;
}
- min = read_clocks_time[min_cpu];
- max = read_clocks_time[max_cpu];
+ min_stime = read_clocks_time[min_stime_cpu];
+ max_stime = read_clocks_time[max_stime_cpu];
+ min_cycles = read_cycles_time[min_cycles_cpu];
+ max_cycles = read_cycles_time[max_cycles_cpu];
spin_unlock(&lock);
- dif = difus = max - min;
- do_div(difus, 1000);
- printk("Min = %"PRIu64" ; Max = %"PRIu64" ; Diff = %"PRIu64
- " (%"PRIu64" microseconds)\n",
- min, max, dif, difus);
+ dif_stime = max_stime - min_stime;
+ if ( dif_stime > maxdif_stime )
+ maxdif_stime = dif_stime;
+ sumdif_stime += dif_stime;
+ dif_cycles = max_cycles - min_cycles;
+ if ( dif_cycles > maxdif_cycles )
+ maxdif_cycles = dif_cycles;
+ sumdif_cycles += dif_cycles;
+ count++;
+ printk("Synced stime skew: max=%"PRIu64"ns avg=%"PRIu64"ns "
+ "samples=%"PRIu32" current=%"PRIu64"ns\n",
+ maxdif_stime, sumdif_stime/count, count, dif_stime);
+ printk("Synced cycles skew: max=%"PRIu64" avg=%"PRIu64" "
+ "samples=%"PRIu32" current=%"PRIu64"\n",
+ maxdif_cycles, sumdif_cycles/count, count, dif_cycles);
}
extern void dump_runq(unsigned char key);
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index d497fdfd57..997b178d7c 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -53,34 +53,11 @@ static int opt_bootscrub __initdata = 1;
boolean_param("bootscrub", opt_bootscrub);
/*
- * Bit width of the DMA heap.
+ * Bit width of the DMA heap -- used to override NUMA-node-first.
+ * allocation strategy, which can otherwise exhaust low memory.
*/
-static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE;
-static void __init parse_dma_bits(char *s)
-{
- unsigned int v = simple_strtol(s, NULL, 0);
- if ( v >= (BITS_PER_LONG + PAGE_SHIFT) )
- dma_bitsize = BITS_PER_LONG + PAGE_SHIFT;
- else if ( v > PAGE_SHIFT + 1 )
- dma_bitsize = v;
- else
- printk("Invalid dma_bits value of %u ignored.\n", v);
-}
-custom_param("dma_bits", parse_dma_bits);
-
-/*
- * Amount of memory to reserve in a low-memory (<4GB) pool for specific
- * allocation requests. Ordinary requests will not fall back to the
- * lowmem emergency pool.
- */
-static unsigned long dma_emergency_pool_pages;
-static void __init parse_dma_emergency_pool(char *s)
-{
- unsigned long long bytes;
- bytes = parse_size_and_unit(s, NULL);
- dma_emergency_pool_pages = bytes >> PAGE_SHIFT;
-}
-custom_param("dma_emergency_pool", parse_dma_emergency_pool);
+static unsigned int dma_bitsize;
+integer_param("dma_bits", dma_bitsize);
#define round_pgdown(_p) ((_p)&PAGE_MASK)
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
@@ -281,11 +258,7 @@ unsigned long __init alloc_boot_pages(
*/
#define MEMZONE_XEN 0
-#ifdef PADDR_BITS
#define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
-#else
-#define NR_ZONES (BITS_PER_LONG - PAGE_SHIFT)
-#endif
#define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
@@ -583,7 +556,22 @@ void __init end_boot_allocator(void)
init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
}
- printk("Domain heap initialised: DMA width %u bits\n", dma_bitsize);
+ if ( !dma_bitsize && (num_online_nodes() > 1) )
+ {
+#ifdef CONFIG_X86
+ dma_bitsize = min_t(unsigned int,
+ fls(NODE_DATA(0)->node_spanned_pages) - 1
+ + PAGE_SHIFT - 2,
+ 32);
+#else
+ dma_bitsize = 32;
+#endif
+ }
+
+ printk("Domain heap initialised");
+ if ( dma_bitsize )
+ printk(" DMA width %u bits", dma_bitsize);
+ printk("\n");
}
#undef avail_for_domheap
@@ -803,20 +791,10 @@ struct page_info *alloc_domheap_pages(
if ( bits < zone_hi )
zone_hi = bits;
- if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
- {
+ if ( (dma_bitsize > PAGE_SHIFT) &&
+ ((zone_hi + PAGE_SHIFT) >= dma_bitsize) )
pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, node, order);
- /* Failure? Then check if we can fall back to the DMA pool. */
- if ( unlikely(pg == NULL) &&
- ((order > MAX_ORDER) ||
- (avail_heap_pages(MEMZONE_XEN + 1,
- dma_bitsize - PAGE_SHIFT - 1,
- -1) <
- (dma_emergency_pool_pages + (1UL << order)))) )
- return NULL;
- }
-
if ( (pg == NULL) &&
((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
node, order)) == NULL) )
@@ -917,28 +895,15 @@ unsigned long avail_domheap_pages_region(
unsigned long avail_domheap_pages(void)
{
- unsigned long avail_nrm, avail_dma;
-
- avail_nrm = avail_heap_pages(dma_bitsize - PAGE_SHIFT,
- NR_ZONES - 1,
- -1);
-
- avail_dma = avail_heap_pages(MEMZONE_XEN + 1,
- dma_bitsize - PAGE_SHIFT - 1,
- -1);
-
- if ( avail_dma > dma_emergency_pool_pages )
- avail_dma -= dma_emergency_pool_pages;
- else
- avail_dma = 0;
-
- return avail_nrm + avail_dma;
+ return avail_heap_pages(MEMZONE_XEN + 1,
+ NR_ZONES - 1,
+ -1);
}
static void pagealloc_keyhandler(unsigned char key)
{
unsigned int zone = MEMZONE_XEN;
- unsigned long total = 0;
+ unsigned long n, total = 0;
printk("Physical memory information:\n");
printk(" Xen heap: %lukB free\n",
@@ -946,9 +911,7 @@ static void pagealloc_keyhandler(unsigned char key)
while ( ++zone < NR_ZONES )
{
- unsigned long n;
-
- if ( zone == dma_bitsize - PAGE_SHIFT )
+ if ( (zone + PAGE_SHIFT) == dma_bitsize )
{
printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
total = 0;
diff --git a/xen/common/shutdown.c b/xen/common/shutdown.c
index a3d48bf899..a8af94b130 100644
--- a/xen/common/shutdown.c
+++ b/xen/common/shutdown.c
@@ -23,8 +23,7 @@ static void maybe_reboot(void)
{
printk("rebooting machine in 5 seconds.\n");
watchdog_disable();
- mdelay(5000);
- machine_restart();
+ machine_restart(5000);
}
}
@@ -50,7 +49,7 @@ void dom0_shutdown(u8 reason)
case SHUTDOWN_reboot:
{
printk("Domain 0 shutdown: rebooting machine.\n");
- machine_restart();
+ machine_restart(0);
break; /* not reached */
}
diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c
index 31e73c3201..c326a6fd20 100644
--- a/xen/drivers/char/console.c
+++ b/xen/drivers/char/console.c
@@ -939,8 +939,7 @@ void panic(const char *fmt, ...)
else
{
watchdog_disable();
- mdelay(5000);
- machine_restart();
+ machine_restart(5000);
}
}
diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c
index b0b35b11a2..e7d2d6c25c 100644
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -27,6 +27,7 @@
extern unsigned long amd_iommu_page_entries;
extern unsigned short ivrs_bdf_entries;
extern struct ivrs_mappings *ivrs_mappings;
+extern unsigned short last_bdf;
static struct amd_iommu * __init find_iommu_from_bdf_cap(
u16 bdf, u8 cap_offset)
@@ -85,10 +86,8 @@ static void __init reserve_unity_map_for_device(
}
/* extend r/w permissioms and keep aggregate */
- if ( iw )
- ivrs_mappings[bdf].write_permission = IOMMU_CONTROL_ENABLED;
- if ( ir )
- ivrs_mappings[bdf].read_permission = IOMMU_CONTROL_ENABLED;
+ ivrs_mappings[bdf].write_permission = iw;
+ ivrs_mappings[bdf].read_permission = ir;
ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_ENABLED;
ivrs_mappings[bdf].addr_range_start = base;
ivrs_mappings[bdf].addr_range_length = length;
@@ -112,7 +111,7 @@ static int __init register_exclusion_range_for_all_devices(
length = range_top - base;
/* reserve r/w unity-mapped page entries for devices */
/* note: these entries are part of the exclusion range */
- for (bdf = 0; bdf < ivrs_bdf_entries; ++bdf)
+ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
reserve_unity_map_for_device(bdf, base, length, iw, ir);
/* push 'base' just outside of virtual address space */
base = iommu_top;
@@ -190,7 +189,7 @@ static int __init register_exclusion_range_for_iommu_devices(
length = range_top - base;
/* reserve r/w unity-mapped page entries for devices */
/* note: these entries are part of the exclusion range */
- for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
+ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
{
bus = bdf >> 8;
devfn = bdf & 0xFF;
@@ -357,7 +356,7 @@ static u16 __init parse_ivhd_device_padding(
}
static u16 __init parse_ivhd_device_select(
- union acpi_ivhd_device *ivhd_device)
+ union acpi_ivhd_device *ivhd_device, struct amd_iommu *iommu)
{
u16 bdf;
@@ -373,13 +372,14 @@ static u16 __init parse_ivhd_device_select(
get_field_from_byte(ivhd_device->header.flags,
AMD_IOMMU_ACPI_SYS_MGT_MASK,
AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ ivrs_mappings[bdf].iommu = iommu;
return sizeof(struct acpi_ivhd_device_header);
}
static u16 __init parse_ivhd_device_range(
union acpi_ivhd_device *ivhd_device,
- u16 header_length, u16 block_length)
+ u16 header_length, u16 block_length, struct amd_iommu *iommu)
{
u16 dev_length, first_bdf, last_bdf, bdf;
u8 sys_mgt;
@@ -423,14 +423,17 @@ static u16 __init parse_ivhd_device_range(
AMD_IOMMU_ACPI_SYS_MGT_MASK,
AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
+ {
ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+ ivrs_mappings[bdf].iommu = iommu;
+ }
return dev_length;
}
static u16 __init parse_ivhd_device_alias(
union acpi_ivhd_device *ivhd_device,
- u16 header_length, u16 block_length)
+ u16 header_length, u16 block_length, struct amd_iommu *iommu)
{
u16 dev_length, alias_id, bdf;
@@ -463,15 +466,18 @@ static u16 __init parse_ivhd_device_alias(
get_field_from_byte(ivhd_device->header.flags,
AMD_IOMMU_ACPI_SYS_MGT_MASK,
AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ ivrs_mappings[bdf].iommu = iommu;
+
ivrs_mappings[alias_id].dte_sys_mgt_enable =
ivrs_mappings[bdf].dte_sys_mgt_enable;
+ ivrs_mappings[alias_id].iommu = iommu;
return dev_length;
}
static u16 __init parse_ivhd_device_alias_range(
union acpi_ivhd_device *ivhd_device,
- u16 header_length, u16 block_length)
+ u16 header_length, u16 block_length, struct amd_iommu *iommu)
{
u16 dev_length, first_bdf, last_bdf, alias_id, bdf;
@@ -527,15 +533,17 @@ static u16 __init parse_ivhd_device_alias_range(
{
ivrs_mappings[bdf].dte_requestor_id = alias_id;
ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+ ivrs_mappings[bdf].iommu = iommu;
}
ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt;
+ ivrs_mappings[alias_id].iommu = iommu;
return dev_length;
}
static u16 __init parse_ivhd_device_extended(
union acpi_ivhd_device *ivhd_device,
- u16 header_length, u16 block_length)
+ u16 header_length, u16 block_length, struct amd_iommu *iommu)
{
u16 dev_length, bdf;
@@ -558,13 +566,14 @@ static u16 __init parse_ivhd_device_extended(
get_field_from_byte(ivhd_device->header.flags,
AMD_IOMMU_ACPI_SYS_MGT_MASK,
AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
+ ivrs_mappings[bdf].iommu = iommu;
return dev_length;
}
static u16 __init parse_ivhd_device_extended_range(
union acpi_ivhd_device *ivhd_device,
- u16 header_length, u16 block_length)
+ u16 header_length, u16 block_length, struct amd_iommu *iommu)
{
u16 dev_length, first_bdf, last_bdf, bdf;
u8 sys_mgt;
@@ -609,7 +618,10 @@ static u16 __init parse_ivhd_device_extended_range(
AMD_IOMMU_ACPI_SYS_MGT_MASK,
AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
for ( bdf = first_bdf; bdf <= last_bdf; bdf++ )
+ {
ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
+ ivrs_mappings[bdf].iommu = iommu;
+ }
return dev_length;
}
@@ -636,33 +648,6 @@ static int __init parse_ivhd_block(struct acpi_ivhd_block_header *ivhd_block)
return -ENODEV;
}
- amd_iov_info("IVHD Block:\n");
- amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset);
- amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base);
- amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment);
- amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info);
-
- /* override IOMMU support flags */
- iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
- AMD_IOMMU_ACPI_COHERENT_MASK,
- AMD_IOMMU_ACPI_COHERENT_SHIFT);
- iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags,
- AMD_IOMMU_ACPI_IOTLB_SUP_MASK,
- AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT);
- iommu->isochronous = get_field_from_byte(ivhd_block->header.flags,
- AMD_IOMMU_ACPI_ISOC_MASK,
- AMD_IOMMU_ACPI_ISOC_SHIFT);
- iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags,
- AMD_IOMMU_ACPI_RES_PASS_PW_MASK,
- AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT);
- iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags,
- AMD_IOMMU_ACPI_PASS_PW_MASK,
- AMD_IOMMU_ACPI_PASS_PW_SHIFT);
- iommu->ht_tunnel_enable = get_field_from_byte(
- ivhd_block->header.flags,
- AMD_IOMMU_ACPI_HT_TUN_ENB_MASK,
- AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT);
-
/* parse Device Entries */
block_length = sizeof(struct acpi_ivhd_block_header);
while ( ivhd_block->header.length >=
@@ -689,32 +674,32 @@ static int __init parse_ivhd_block(struct acpi_ivhd_block_header *ivhd_block)
ivhd_block->header.length, block_length);
break;
case AMD_IOMMU_ACPI_IVHD_DEV_SELECT:
- dev_length = parse_ivhd_device_select(ivhd_device);
+ dev_length = parse_ivhd_device_select(ivhd_device, iommu);
break;
case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START:
dev_length = parse_ivhd_device_range(
ivhd_device,
- ivhd_block->header.length, block_length);
+ ivhd_block->header.length, block_length, iommu);
break;
case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT:
dev_length = parse_ivhd_device_alias(
ivhd_device,
- ivhd_block->header.length, block_length);
+ ivhd_block->header.length, block_length, iommu);
break;
case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE:
dev_length = parse_ivhd_device_alias_range(
ivhd_device,
- ivhd_block->header.length, block_length);
+ ivhd_block->header.length, block_length, iommu);
break;
case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT:
dev_length = parse_ivhd_device_extended(
ivhd_device,
- ivhd_block->header.length, block_length);
+ ivhd_block->header.length, block_length, iommu);
break;
case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
dev_length = parse_ivhd_device_extended_range(
ivhd_device,
- ivhd_block->header.length, block_length);
+ ivhd_block->header.length, block_length, iommu);
break;
default:
amd_iov_error("IVHD Error: Invalid Device Type!\n");
@@ -794,11 +779,10 @@ static void __init dump_acpi_table_header(struct acpi_table_header *table)
}
-int __init parse_ivrs_table(struct acpi_table_header *_table)
+static int __init parse_ivrs_table(struct acpi_table_header *_table)
{
struct acpi_ivrs_block_header *ivrs_block;
- unsigned long length, i;
- u8 checksum, *raw_table;
+ unsigned long length;
int error = 0;
struct acpi_table_header *table = (struct acpi_table_header *)_table;
@@ -806,18 +790,6 @@ int __init parse_ivrs_table(struct acpi_table_header *_table)
dump_acpi_table_header(table);
- /* validate checksum: sum of entire table == 0 */
- checksum = 0;
- raw_table = (u8 *)table;
- for ( i = 0; i < table->length; i++ )
- checksum += raw_table[i];
- if ( checksum )
- {
- amd_iov_error("IVRS Error: "
- "Invalid Checksum 0x%x\n", checksum);
- return -ENODEV;
- }
-
/* parse IVRS blocks */
length = sizeof(struct acpi_ivrs_table_header);
while ( (error == 0) && (table->length > (length + sizeof(*ivrs_block))) )
@@ -846,3 +818,144 @@ int __init parse_ivrs_table(struct acpi_table_header *_table)
return error;
}
+
+static int __init detect_iommu_acpi(struct acpi_table_header *_table)
+{
+ struct acpi_ivrs_block_header *ivrs_block;
+ struct acpi_table_header *table = (struct acpi_table_header *)_table;
+ unsigned long i;
+ unsigned long length = sizeof(struct acpi_ivrs_table_header);
+ u8 checksum, *raw_table;
+
+ /* validate checksum: sum of entire table == 0 */
+ checksum = 0;
+ raw_table = (u8 *)table;
+ for ( i = 0; i < table->length; i++ )
+ checksum += raw_table[i];
+ if ( checksum )
+ {
+ amd_iov_error("IVRS Error: "
+ "Invalid Checksum 0x%x\n", checksum);
+ return -ENODEV;
+ }
+
+ while ( table->length > (length + sizeof(*ivrs_block)) )
+ {
+ ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length);
+ if ( table->length < (length + ivrs_block->length) )
+ return -ENODEV;
+ if ( ivrs_block->type == AMD_IOMMU_ACPI_IVHD_TYPE )
+ if ( amd_iommu_detect_one_acpi((void*)ivrs_block) != 0 )
+ return -ENODEV;
+ length += ivrs_block->length;
+ }
+ return 0;
+}
+
+#define UPDATE_LAST_BDF(x) do {\
+ if ((x) > last_bdf) \
+ last_bdf = (x); \
+ } while(0);
+
+static int __init get_last_bdf_ivhd(void *ivhd)
+{
+ union acpi_ivhd_device *ivhd_device;
+ u16 block_length, dev_length;
+ struct acpi_ivhd_block_header *ivhd_block;
+
+ ivhd_block = (struct acpi_ivhd_block_header *)ivhd;
+
+ if ( ivhd_block->header.length <
+ sizeof(struct acpi_ivhd_block_header) )
+ {
+ amd_iov_error("IVHD Error: Invalid Block Length!\n");
+ return -ENODEV;
+ }
+
+ block_length = sizeof(struct acpi_ivhd_block_header);
+ while ( ivhd_block->header.length >=
+ (block_length + sizeof(struct acpi_ivhd_device_header)) )
+ {
+ ivhd_device = (union acpi_ivhd_device *)
+ ((u8 *)ivhd_block + block_length);
+
+ switch ( ivhd_device->header.type )
+ {
+ case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD:
+ dev_length = sizeof(u32);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD:
+ dev_length = sizeof(u64);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_SELECT:
+ UPDATE_LAST_BDF(ivhd_device->header.dev_id);
+ dev_length = sizeof(struct acpi_ivhd_device_header);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT:
+ UPDATE_LAST_BDF(ivhd_device->header.dev_id);
+ dev_length = sizeof(struct acpi_ivhd_device_alias);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT:
+ UPDATE_LAST_BDF(ivhd_device->header.dev_id);
+ dev_length = sizeof(struct acpi_ivhd_device_extended);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START:
+ UPDATE_LAST_BDF(ivhd_device->range.trailer.dev_id);
+ dev_length = sizeof(struct acpi_ivhd_device_range);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE:
+ UPDATE_LAST_BDF(ivhd_device->alias_range.trailer.dev_id)
+ dev_length = sizeof(struct acpi_ivhd_device_alias_range);
+ break;
+ case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
+ UPDATE_LAST_BDF(ivhd_device->extended_range.trailer.dev_id)
+ dev_length = sizeof(struct acpi_ivhd_device_extended_range);
+ break;
+ default:
+ amd_iov_error("IVHD Error: Invalid Device Type!\n");
+ dev_length = 0;
+ break;
+ }
+
+ block_length += dev_length;
+ if ( !dev_length )
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int __init get_last_bdf_acpi(struct acpi_table_header *_table)
+{
+ struct acpi_ivrs_block_header *ivrs_block;
+ struct acpi_table_header *table = (struct acpi_table_header *)_table;
+ unsigned long length = sizeof(struct acpi_ivrs_table_header);
+
+ while ( table->length > (length + sizeof(*ivrs_block)) )
+ {
+ ivrs_block = (struct acpi_ivrs_block_header *) ((u8 *)table + length);
+ if ( table->length < (length + ivrs_block->length) )
+ return -ENODEV;
+ if ( ivrs_block->type == AMD_IOMMU_ACPI_IVHD_TYPE )
+ if ( get_last_bdf_ivhd((void*)ivrs_block) != 0 )
+ return -ENODEV;
+ length += ivrs_block->length;
+ }
+ return 0;
+}
+
+int __init amd_iommu_detect_acpi(void)
+{
+ return acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, detect_iommu_acpi);
+}
+
+int __init amd_iommu_get_ivrs_dev_entries(void)
+{
+ acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, get_last_bdf_acpi);
+ return last_bdf + 1;
+}
+
+int __init amd_iommu_update_ivrs_mapping_acpi(void)
+{
+ return acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, parse_ivrs_table);
+}
diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c
index f713a0e15f..9180de9aec 100644
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -25,65 +25,10 @@
#include <xen/pci_regs.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
+#include <asm/hvm/svm/amd-iommu-acpi.h>
-static int __init valid_bridge_bus_config(
- int bus, int dev, int func, int *sec_bus, int *sub_bus)
-{
- int pri_bus;
-
- pri_bus = pci_conf_read8(bus, dev, func, PCI_PRIMARY_BUS);
- *sec_bus = pci_conf_read8(bus, dev, func, PCI_SECONDARY_BUS);
- *sub_bus = pci_conf_read8(bus, dev, func, PCI_SUBORDINATE_BUS);
-
- return ((pri_bus == bus) && (*sec_bus > bus) && (*sub_bus >= *sec_bus));
-}
-
-int __init get_iommu_last_downstream_bus(struct amd_iommu *iommu)
-{
- int bus, dev, func;
- int devfn, hdr_type;
- int sec_bus, sub_bus;
- int multi_func;
-
- bus = iommu->last_downstream_bus = iommu->root_bus;
- iommu->downstream_bus_present[bus] = 1;
- dev = PCI_SLOT(iommu->first_devfn);
- multi_func = PCI_FUNC(iommu->first_devfn) > 0;
- for ( devfn = iommu->first_devfn; devfn <= iommu->last_devfn; devfn++ )
- {
- /* skipping to next device#? */
- if ( dev != PCI_SLOT(devfn) )
- {
- dev = PCI_SLOT(devfn);
- multi_func = 0;
- }
- func = PCI_FUNC(devfn);
-
- if ( !VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
- PCI_VENDOR_ID)) )
- continue;
-
- hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
- if ( func == 0 )
- multi_func = IS_PCI_MULTI_FUNCTION(hdr_type);
-
- if ( (func == 0 || multi_func) &&
- IS_PCI_TYPE1_HEADER(hdr_type) )
- {
- if ( !valid_bridge_bus_config(bus, dev, func,
- &sec_bus, &sub_bus) )
- return -ENODEV;
-
- if ( sub_bus > iommu->last_downstream_bus )
- iommu->last_downstream_bus = sub_bus;
- do {
- iommu->downstream_bus_present[sec_bus] = 1;
- } while ( sec_bus++ < sub_bus );
- }
- }
-
- return 0;
-}
+extern struct list_head amd_iommu_head;
+unsigned short last_bdf = 0;
static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func,
struct amd_iommu *iommu)
@@ -128,30 +73,10 @@ int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
struct amd_iommu *iommu)
{
u32 cap_header, cap_range, misc_info;
- u64 mmio_bar;
-
- mmio_bar = (u64)pci_conf_read32(
- bus, dev, func, cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
- mmio_bar |= pci_conf_read32(bus, dev, func,
- cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET);
- iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF;
-
- if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) )
- {
- amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
- return -ENODEV;
- }
-
- iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func);
- iommu->cap_offset = cap_ptr;
cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
iommu->revision = get_field_from_reg_u32(
cap_header, PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT);
- iommu->iotlb_support = get_field_from_reg_u32(
- cap_header, PCI_CAP_IOTLB_MASK, PCI_CAP_IOTLB_SHIFT);
- iommu->ht_tunnel_support = get_field_from_reg_u32(
- cap_header, PCI_CAP_HT_TUNNEL_MASK, PCI_CAP_HT_TUNNEL_SHIFT);
iommu->pte_not_present_cached = get_field_from_reg_u32(
cap_header, PCI_CAP_NP_CACHE_MASK, PCI_CAP_NP_CACHE_SHIFT);
@@ -159,96 +84,76 @@ int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
cap_ptr + PCI_CAP_RANGE_OFFSET);
iommu->unit_id = get_field_from_reg_u32(
cap_range, PCI_CAP_UNIT_ID_MASK, PCI_CAP_UNIT_ID_SHIFT);
- iommu->root_bus = get_field_from_reg_u32(
- cap_range, PCI_CAP_BUS_NUMBER_MASK, PCI_CAP_BUS_NUMBER_SHIFT);
- iommu->first_devfn = get_field_from_reg_u32(
- cap_range, PCI_CAP_FIRST_DEVICE_MASK, PCI_CAP_FIRST_DEVICE_SHIFT);
- iommu->last_devfn = get_field_from_reg_u32(
- cap_range, PCI_CAP_LAST_DEVICE_MASK, PCI_CAP_LAST_DEVICE_SHIFT);
misc_info = pci_conf_read32(bus, dev, func,
cap_ptr + PCI_MISC_INFO_OFFSET);
iommu->msi_number = get_field_from_reg_u32(
misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT);
- get_iommu_msi_capabilities(bus, dev, func, iommu);
-
return 0;
}
-static int __init scan_caps_for_iommu(
- int bus, int dev, int func,
- iommu_detect_callback_ptr_t iommu_detect_callback)
+int __init amd_iommu_detect_one_acpi(void *ivhd)
{
- int cap_ptr, cap_id, cap_type;
- u32 cap_header;
- int count, error = 0;
-
- count = 0;
- cap_ptr = pci_conf_read8(bus, dev, func, PCI_CAPABILITY_LIST);
- while ( (cap_ptr >= PCI_MIN_CAP_OFFSET) &&
- (count < PCI_MAX_CAP_BLOCKS) &&
- !error )
- {
- cap_ptr &= PCI_CAP_PTR_MASK;
- cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
- cap_id = get_field_from_reg_u32(
- cap_header, PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT);
+ struct amd_iommu *iommu;
+ u8 bus, dev, func;
+ struct acpi_ivhd_block_header *ivhd_block;
- if ( cap_id == PCI_CAP_ID_SECURE_DEVICE )
- {
- cap_type = get_field_from_reg_u32(
- cap_header, PCI_CAP_TYPE_MASK, PCI_CAP_TYPE_SHIFT);
- if ( cap_type == PCI_CAP_TYPE_IOMMU )
- error = iommu_detect_callback(
- bus, dev, func, cap_ptr);
- }
+ ivhd_block = (struct acpi_ivhd_block_header *)ivhd;
- cap_ptr = get_field_from_reg_u32(
- cap_header, PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT);
- count++;
+ if ( ivhd_block->header.length < sizeof(struct acpi_ivhd_block_header) )
+ {
+ amd_iov_error("Invalid IVHD Block Length!\n");
+ return -ENODEV;
}
- return error;
-}
-
-static int __init scan_functions_for_iommu(
- int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback)
-{
- int func, hdr_type;
- int count = 1, error = 0;
-
- for ( func = 0;
- (func < count) && !error &&
- VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
- PCI_VENDOR_ID));
- func++ )
-
+ if ( !ivhd_block->header.dev_id ||
+ !ivhd_block->cap_offset || !ivhd_block->mmio_base)
{
- hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
-
- if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) )
- count = PCI_MAX_FUNC_COUNT;
-
- if ( IS_PCI_TYPE0_HEADER(hdr_type) ||
- IS_PCI_TYPE1_HEADER(hdr_type) )
- error = scan_caps_for_iommu(bus, dev, func,
- iommu_detect_callback);
+ amd_iov_error("Invalid IVHD Block!\n");
+ return -ENODEV;
}
- return error;
-}
-
-
-int __init scan_for_iommu(iommu_detect_callback_ptr_t iommu_detect_callback)
-{
- int bus, dev, error = 0;
+ iommu = (struct amd_iommu *) xmalloc(struct amd_iommu);
+ if ( !iommu )
+ {
+ amd_iov_error("Error allocating amd_iommu\n");
+ return -ENOMEM;
+ }
+ memset(iommu, 0, sizeof(struct amd_iommu));
+
+ spin_lock_init(&iommu->lock);
+
+ iommu->bdf = ivhd_block->header.dev_id;
+ iommu->cap_offset = ivhd_block->cap_offset;
+ iommu->mmio_base_phys = ivhd_block->mmio_base;
+
+ /* override IOMMU support flags */
+ iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_COHERENT_MASK,
+ AMD_IOMMU_ACPI_COHERENT_SHIFT);
+ iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_IOTLB_SUP_MASK,
+ AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT);
+ iommu->isochronous = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_ISOC_MASK,
+ AMD_IOMMU_ACPI_ISOC_SHIFT);
+ iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_RES_PASS_PW_MASK,
+ AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT);
+ iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_PASS_PW_MASK,
+ AMD_IOMMU_ACPI_PASS_PW_SHIFT);
+ iommu->ht_tunnel_enable = get_field_from_byte(ivhd_block->header.flags,
+ AMD_IOMMU_ACPI_HT_TUN_ENB_MASK,
+ AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT);
+ bus = iommu->bdf >> 8;
+ dev = PCI_SLOT(iommu->bdf & 0xFF);
+ func = PCI_FUNC(iommu->bdf & 0xFF);
+ get_iommu_capabilities(bus, dev, func, iommu->cap_offset, iommu);
+ get_iommu_msi_capabilities(bus, dev, func, iommu);
- for ( bus = 0; bus < PCI_MAX_BUS_COUNT && !error; ++bus )
- for ( dev = 0; dev < PCI_MAX_DEV_COUNT && !error; ++dev )
- error = scan_functions_for_iommu(bus, dev,
- iommu_detect_callback);
+ list_add_tail(&iommu->list, &amd_iommu_head);
- return error;
+ return 0;
}
-
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index e1a2fae283..0a2081fe39 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -27,10 +27,20 @@
#include <asm/hvm/svm/amd-iommu-proto.h>
#include <asm-x86/fixmap.h>
-extern int nr_amd_iommus;
static struct amd_iommu *vector_to_iommu[NR_VECTORS];
+static int nr_amd_iommus;
+static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
+static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES;
-int __init map_iommu_mmio_region(struct amd_iommu *iommu)
+unsigned short ivrs_bdf_entries;
+struct ivrs_mappings *ivrs_mappings;
+struct list_head amd_iommu_head;
+struct table_struct device_table;
+
+extern void *int_remap_table;
+extern spinlock_t int_remap_table_lock;
+
+static int __init map_iommu_mmio_region(struct amd_iommu *iommu)
{
unsigned long mfn;
@@ -51,7 +61,7 @@ int __init map_iommu_mmio_region(struct amd_iommu *iommu)
return 0;
}
-void __init unmap_iommu_mmio_region(struct amd_iommu *iommu)
+static void __init unmap_iommu_mmio_region(struct amd_iommu *iommu)
{
if ( iommu->mmio_base )
{
@@ -60,7 +70,7 @@ void __init unmap_iommu_mmio_region(struct amd_iommu *iommu)
}
}
-void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu)
+static void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu)
{
u64 addr_64, addr_lo, addr_hi;
u32 entry;
@@ -83,7 +93,7 @@ void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu)
writel(entry, iommu->mmio_base + IOMMU_DEV_TABLE_BASE_HIGH_OFFSET);
}
-void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu)
+static void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu)
{
u64 addr_64, addr_lo, addr_hi;
u32 power_of2_entries;
@@ -110,7 +120,7 @@ void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu)
writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET);
}
-void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu)
+static void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu)
{
u64 addr_64, addr_lo, addr_hi;
u32 power_of2_entries;
@@ -266,12 +276,13 @@ static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[])
return -EFAULT;
}
-static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector)
+static void amd_iommu_msi_data_init(struct amd_iommu *iommu)
{
u32 msi_data;
u8 bus = (iommu->bdf >> 8) & 0xff;
u8 dev = PCI_SLOT(iommu->bdf & 0xff);
u8 func = PCI_FUNC(iommu->bdf & 0xff);
+ int vector = iommu->vector;
msi_data = MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
@@ -434,7 +445,6 @@ static void amd_iommu_page_fault(int vector, void *dev_id,
static int set_iommu_interrupt_handler(struct amd_iommu *iommu)
{
int vector, ret;
- unsigned long flags;
vector = assign_irq_vector(AUTO_ASSIGN);
vector_to_iommu[vector] = iommu;
@@ -450,21 +460,13 @@ static int set_iommu_interrupt_handler(struct amd_iommu *iommu)
}
irq_desc[vector].handler = &iommu_msi_type;
- ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu);
+ ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
if ( ret )
{
amd_iov_error("can't request irq\n");
return 0;
}
- spin_lock_irqsave(&iommu->lock, flags);
-
- amd_iommu_msi_data_init (iommu, vector);
- amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
- amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
-
return vector;
}
@@ -472,16 +474,196 @@ void __init enable_iommu(struct amd_iommu *iommu)
{
unsigned long flags;
- set_iommu_interrupt_handler(iommu);
-
spin_lock_irqsave(&iommu->lock, flags);
+ if ( iommu->enabled )
+ {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return;
+ }
+
+ iommu->dev_table.alloc_size = device_table.alloc_size;
+ iommu->dev_table.entries = device_table.entries;
+ iommu->dev_table.buffer = device_table.buffer;
+
+ register_iommu_dev_table_in_mmio_space(iommu);
+ register_iommu_cmd_buffer_in_mmio_space(iommu);
+ register_iommu_event_log_in_mmio_space(iommu);
register_iommu_exclusion_range(iommu);
+
+ amd_iommu_msi_data_init (iommu);
+ amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
+ amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
+
set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
+ printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus );
+ nr_amd_iommus++;
+
+ iommu->enabled = 1;
spin_unlock_irqrestore(&iommu->lock, flags);
- printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus);
+}
+
+static void __init deallocate_iommu_table_struct(
+ struct table_struct *table)
+{
+ if ( table->buffer )
+ {
+ free_xenheap_pages(table->buffer,
+ get_order_from_bytes(table->alloc_size));
+ table->buffer = NULL;
+ }
+}
+
+static void __init deallocate_iommu_tables(struct amd_iommu *iommu)
+{
+ deallocate_iommu_table_struct(&iommu->cmd_buffer);
+ deallocate_iommu_table_struct(&iommu->event_log);
+}
+
+static int __init allocate_iommu_table_struct(struct table_struct *table,
+ const char *name)
+{
+ table->buffer = (void *) alloc_xenheap_pages(
+ get_order_from_bytes(table->alloc_size));
+
+ if ( !table->buffer )
+ {
+ amd_iov_error("Error allocating %s\n", name);
+ return -ENOMEM;
+ }
+
+ memset(table->buffer, 0, table->alloc_size);
+ return 0;
+}
+
+static int __init allocate_iommu_tables(struct amd_iommu *iommu)
+{
+ /* allocate 'command buffer' in power of 2 increments of 4K */
+ iommu->cmd_buffer_tail = 0;
+ iommu->cmd_buffer.alloc_size = PAGE_SIZE << get_order_from_bytes(
+ PAGE_ALIGN(amd_iommu_cmd_buffer_entries * IOMMU_CMD_BUFFER_ENTRY_SIZE));
+ iommu->cmd_buffer.entries =
+ iommu->cmd_buffer.alloc_size / IOMMU_CMD_BUFFER_ENTRY_SIZE;
+
+ if ( allocate_iommu_table_struct(&iommu->cmd_buffer, "Command Buffer") != 0 )
+ goto error_out;
+
+ /* allocate 'event log' in power of 2 increments of 4K */
+ iommu->event_log_head = 0;
+ iommu->event_log.alloc_size = PAGE_SIZE << get_order_from_bytes(
+ PAGE_ALIGN(amd_iommu_event_log_entries * IOMMU_EVENT_LOG_ENTRY_SIZE));
+ iommu->event_log.entries =
+ iommu->event_log.alloc_size / IOMMU_EVENT_LOG_ENTRY_SIZE;
+
+ if ( allocate_iommu_table_struct(&iommu->event_log, "Event Log") != 0 )
+ goto error_out;
+
+ return 0;
+
+ error_out:
+ deallocate_iommu_tables(iommu);
+ return -ENOMEM;
+}
+
+int __init amd_iommu_init_one(struct amd_iommu *iommu)
+{
+
+ if ( allocate_iommu_tables(iommu) != 0 )
+ goto error_out;
+
+ if ( map_iommu_mmio_region(iommu) != 0 )
+ goto error_out;
+
+ if ( set_iommu_interrupt_handler(iommu) == 0 )
+ goto error_out;
+
+ enable_iommu(iommu);
+ return 0;
+
+error_out:
+ return -ENODEV;
+}
+
+void __init amd_iommu_init_cleanup(void)
+{
+ struct amd_iommu *iommu, *next;
+
+ list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
+ {
+ list_del(&iommu->list);
+ if ( iommu->enabled )
+ {
+ deallocate_iommu_tables(iommu);
+ unmap_iommu_mmio_region(iommu);
+ }
+ xfree(iommu);
+ }
+}
+
+static int __init init_ivrs_mapping(void)
+{
+ int bdf;
+
+ BUG_ON( !ivrs_bdf_entries );
+
+ ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
+ if ( ivrs_mappings == NULL )
+ {
+ amd_iov_error("Error allocating IVRS Mappings table\n");
+ return -ENOMEM;
+ }
+ memset(ivrs_mappings, 0, ivrs_bdf_entries * sizeof(struct ivrs_mappings));
+
+ /* assign default values for device entries */
+ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
+ {
+ ivrs_mappings[bdf].dte_requestor_id = bdf;
+ ivrs_mappings[bdf].dte_sys_mgt_enable =
+ IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED;
+ ivrs_mappings[bdf].dte_allow_exclusion = IOMMU_CONTROL_DISABLED;
+ ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_DISABLED;
+ ivrs_mappings[bdf].iommu = NULL;
+ }
+ return 0;
+}
+
+static int __init amd_iommu_setup_device_table(void)
+{
+ /* allocate 'device table' on a 4K boundary */
+ device_table.alloc_size = PAGE_SIZE << get_order_from_bytes(
+ PAGE_ALIGN(ivrs_bdf_entries * IOMMU_DEV_TABLE_ENTRY_SIZE));
+ device_table.entries = device_table.alloc_size / IOMMU_DEV_TABLE_ENTRY_SIZE;
+
+ return ( allocate_iommu_table_struct(&device_table, "Device Table") );
+}
+
+int __init amd_iommu_setup_shared_tables(void)
+{
+ BUG_ON( !ivrs_bdf_entries );
+
+ if (init_ivrs_mapping() != 0 )
+ goto error_out;
+
+ if ( amd_iommu_setup_device_table() != 0 )
+ goto error_out;
+
+ if ( amd_iommu_setup_intremap_table() != 0 )
+ goto error_out;
+
+ return 0;
+
+error_out:
+ deallocate_intremap_table();
+ deallocate_iommu_table_struct(&device_table);
+
+ if ( ivrs_mappings )
+ {
+ xfree(ivrs_mappings);
+ ivrs_mappings = NULL;
+ }
+ return -ENOMEM;
}
diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c
index ef31c5dd6e..e6ade3a10b 100644
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -107,7 +107,7 @@ static void update_intremap_entry_from_ioapic(
return;
}
-int amd_iommu_setup_intremap_table(void)
+int __init amd_iommu_setup_intremap_table(void)
{
unsigned long flags;
@@ -203,3 +203,18 @@ void amd_iommu_msi_msg_update_ire(
update_intremap_entry_from_msi_msg(iommu, pdev, msg);
}
+
+int __init deallocate_intremap_table(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&int_remap_table_lock, flags);
+ if ( int_remap_table )
+ {
+ free_xenheap_pages(int_remap_table, 1);
+ int_remap_table = NULL;
+ }
+ spin_unlock_irqrestore(&int_remap_table_lock, flags);
+
+ return 0;
+}
diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index 75961558a3..a41fe60890 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -23,7 +23,7 @@
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
-extern long amd_iommu_poll_comp_wait;
+long amd_iommu_poll_comp_wait = COMPLETION_WAIT_DEFAULT_POLLING_COUNT;
static int queue_iommu_command(struct amd_iommu *iommu, u32 cmd[])
{
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 4c57af7747..8e982455cb 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -23,17 +23,10 @@
#include <xen/pci_regs.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
-#include <asm/hvm/svm/amd-iommu-acpi.h>
#include <asm/mm.h>
-struct list_head amd_iommu_head;
-long amd_iommu_poll_comp_wait = COMPLETION_WAIT_DEFAULT_POLLING_COUNT;
-static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
-static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES;
-int nr_amd_iommus;
-
-unsigned short ivrs_bdf_entries;
-struct ivrs_mappings *ivrs_mappings;
+extern unsigned short ivrs_bdf_entries;
+extern struct ivrs_mappings *ivrs_mappings;
extern void *int_remap_table;
static void deallocate_domain_page_tables(struct hvm_iommu *hd)
@@ -47,209 +40,39 @@ static void deallocate_domain_resources(struct hvm_iommu *hd)
deallocate_domain_page_tables(hd);
}
-static void __init init_cleanup(void)
+int __init amd_iommu_init(void)
{
struct amd_iommu *iommu;
- for_each_amd_iommu ( iommu )
- unmap_iommu_mmio_region(iommu);
-}
-
-static void __init deallocate_iommu_table_struct(
- struct table_struct *table)
-{
- if ( table->buffer )
- {
- free_xenheap_pages(table->buffer,
- get_order_from_bytes(table->alloc_size));
- table->buffer = NULL;
- }
-}
-
-static void __init deallocate_iommu_resources(struct amd_iommu *iommu)
-{
- deallocate_iommu_table_struct(&iommu->dev_table);
- deallocate_iommu_table_struct(&iommu->cmd_buffer);
- deallocate_iommu_table_struct(&iommu->event_log);
-}
+ BUG_ON( !iommu_found() );
-static int __init allocate_iommu_table_struct(struct table_struct *table,
- const char *name)
-{
- table->buffer = (void *) alloc_xenheap_pages(
- get_order_from_bytes(table->alloc_size));
+ ivrs_bdf_entries = amd_iommu_get_ivrs_dev_entries();
- if ( !table->buffer )
- {
- amd_iov_error("Error allocating %s\n", name);
- return -ENOMEM;
- }
-
- memset(table->buffer, 0, table->alloc_size);
-
- return 0;
-}
-
-static int __init allocate_iommu_resources(struct amd_iommu *iommu)
-{
- /* allocate 'device table' on a 4K boundary */
- iommu->dev_table.alloc_size =
- PAGE_ALIGN(((iommu->last_downstream_bus + 1) *
- IOMMU_DEV_TABLE_ENTRIES_PER_BUS) *
- IOMMU_DEV_TABLE_ENTRY_SIZE);
- iommu->dev_table.entries =
- iommu->dev_table.alloc_size / IOMMU_DEV_TABLE_ENTRY_SIZE;
-
- if ( allocate_iommu_table_struct(&iommu->dev_table,
- "Device Table") != 0 )
+ if ( !ivrs_bdf_entries )
goto error_out;
- /* allocate 'command buffer' in power of 2 increments of 4K */
- iommu->cmd_buffer_tail = 0;
- iommu->cmd_buffer.alloc_size =
- PAGE_SIZE << get_order_from_bytes(
- PAGE_ALIGN(amd_iommu_cmd_buffer_entries *
- IOMMU_CMD_BUFFER_ENTRY_SIZE));
-
- iommu->cmd_buffer.entries =
- iommu->cmd_buffer.alloc_size / IOMMU_CMD_BUFFER_ENTRY_SIZE;
-
- if ( allocate_iommu_table_struct(&iommu->cmd_buffer,
- "Command Buffer") != 0 )
+ if ( amd_iommu_setup_shared_tables() != 0 )
goto error_out;
- /* allocate 'event log' in power of 2 increments of 4K */
- iommu->event_log_head = 0;
- iommu->event_log.alloc_size =
- PAGE_SIZE << get_order_from_bytes(
- PAGE_ALIGN(amd_iommu_event_log_entries *
- IOMMU_EVENT_LOG_ENTRY_SIZE));
-
- iommu->event_log.entries =
- iommu->event_log.alloc_size / IOMMU_EVENT_LOG_ENTRY_SIZE;
-
- if ( allocate_iommu_table_struct(&iommu->event_log,
- "Event Log") != 0 )
+ if ( amd_iommu_update_ivrs_mapping_acpi() != 0 )
goto error_out;
- return 0;
-
- error_out:
- deallocate_iommu_resources(iommu);
- return -ENOMEM;
-}
-
-int iommu_detect_callback(u8 bus, u8 dev, u8 func, u8 cap_ptr)
-{
- struct amd_iommu *iommu;
-
- iommu = (struct amd_iommu *) xmalloc(struct amd_iommu);
- if ( !iommu )
- {
- amd_iov_error("Error allocating amd_iommu\n");
- return -ENOMEM;
- }
- memset(iommu, 0, sizeof(struct amd_iommu));
- spin_lock_init(&iommu->lock);
-
- /* get capability and topology information */
- if ( get_iommu_capabilities(bus, dev, func, cap_ptr, iommu) != 0 )
- goto error_out;
- if ( get_iommu_last_downstream_bus(iommu) != 0 )
- goto error_out;
-
- list_add_tail(&iommu->list, &amd_iommu_head);
-
- /* allocate resources for this IOMMU */
- if ( allocate_iommu_resources(iommu) != 0 )
- goto error_out;
-
- return 0;
-
- error_out:
- xfree(iommu);
- return -ENODEV;
-}
-
-static int __init amd_iommu_init(void)
-{
- struct amd_iommu *iommu;
- unsigned long flags;
- u16 bdf;
-
for_each_amd_iommu ( iommu )
- {
- spin_lock_irqsave(&iommu->lock, flags);
-
- /* assign default IOMMU values */
- iommu->coherent = IOMMU_CONTROL_ENABLED;
- iommu->isochronous = IOMMU_CONTROL_ENABLED;
- iommu->res_pass_pw = IOMMU_CONTROL_ENABLED;
- iommu->pass_pw = IOMMU_CONTROL_ENABLED;
- iommu->ht_tunnel_enable = iommu->ht_tunnel_support ?
- IOMMU_CONTROL_ENABLED : IOMMU_CONTROL_DISABLED;
- iommu->exclusion_enable = IOMMU_CONTROL_DISABLED;
- iommu->exclusion_allow_all = IOMMU_CONTROL_DISABLED;
-
- /* register IOMMU data strucures in MMIO space */
- if ( map_iommu_mmio_region(iommu) != 0 )
+ if ( amd_iommu_init_one(iommu) != 0 )
goto error_out;
- register_iommu_dev_table_in_mmio_space(iommu);
- register_iommu_cmd_buffer_in_mmio_space(iommu);
- register_iommu_event_log_in_mmio_space(iommu);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
- }
-
- /* assign default values for device entries */
- for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
- {
- ivrs_mappings[bdf].dte_requestor_id = bdf;
- ivrs_mappings[bdf].dte_sys_mgt_enable =
- IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED;
- ivrs_mappings[bdf].dte_allow_exclusion =
- IOMMU_CONTROL_DISABLED;
- ivrs_mappings[bdf].unity_map_enable =
- IOMMU_CONTROL_DISABLED;
- }
-
- if ( acpi_table_parse(AMD_IOMMU_ACPI_IVRS_SIG, parse_ivrs_table) != 0 )
- amd_iov_error("Did not find IVRS table!\n");
-
- for_each_amd_iommu ( iommu )
- {
- /* enable IOMMU translation services */
- enable_iommu(iommu);
- nr_amd_iommus++;
- }
return 0;
- error_out:
- init_cleanup();
+error_out:
+ amd_iommu_init_cleanup();
return -ENODEV;
}
struct amd_iommu *find_iommu_for_device(int bus, int devfn)
{
- struct amd_iommu *iommu;
-
- for_each_amd_iommu ( iommu )
- {
- if ( bus == iommu->root_bus )
- {
- if ( (devfn >= iommu->first_devfn) &&
- (devfn <= iommu->last_devfn) )
- return iommu;
- }
- else if ( bus <= iommu->last_downstream_bus )
- {
- if ( iommu->downstream_bus_present[bus] )
- return iommu;
- }
- }
-
- return NULL;
+ u16 bdf = (bus << 8) | devfn;
+ BUG_ON ( bdf >= ivrs_bdf_entries );
+ return ivrs_mappings[bdf].iommu;
}
static void amd_iommu_setup_domain_device(
@@ -335,70 +158,26 @@ static void amd_iommu_setup_dom0_devices(struct domain *d)
int amd_iov_detect(void)
{
- int last_bus;
- struct amd_iommu *iommu, *next;
-
INIT_LIST_HEAD(&amd_iommu_head);
- if ( scan_for_iommu(iommu_detect_callback) != 0 )
+ if ( amd_iommu_detect_acpi() != 0 )
{
amd_iov_error("Error detection\n");
- goto error_out;
+ return -ENODEV;
}
if ( !iommu_found() )
{
printk("AMD_IOV: IOMMU not found!\n");
- goto error_out;
- }
-
- /* allocate 'ivrs mappings' table */
- /* note: the table has entries to accomodate all IOMMUs */
- last_bus = 0;
- for_each_amd_iommu ( iommu )
- if ( iommu->last_downstream_bus > last_bus )
- last_bus = iommu->last_downstream_bus;
-
- ivrs_bdf_entries = (last_bus + 1) *
- IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
- ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
- if ( ivrs_mappings == NULL )
- {
- amd_iov_error("Error allocating IVRS DevMappings table\n");
- goto error_out;
- }
- memset(ivrs_mappings, 0,
- ivrs_bdf_entries * sizeof(struct ivrs_mappings));
-
- if ( amd_iommu_setup_intremap_table() != 0 )
- {
- amd_iov_error("Error allocating interrupt remapping table\n");
- goto error_out;
+ return -ENODEV;
}
if ( amd_iommu_init() != 0 )
{
amd_iov_error("Error initialization\n");
- goto error_out;
+ return -ENODEV;
}
-
return 0;
-
- error_out:
- list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
- {
- list_del(&iommu->list);
- deallocate_iommu_resources(iommu);
- xfree(iommu);
- }
-
- if ( ivrs_mappings )
- {
- xfree(ivrs_mappings);
- ivrs_mappings = NULL;
- }
-
- return -ENODEV;
}
static int allocate_domain_resources(struct hvm_iommu *hd)
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 2e2afaeb13..9cc39919d6 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -20,15 +20,48 @@
extern struct iommu_ops intel_iommu_ops;
extern struct iommu_ops amd_iommu_ops;
+static void parse_iommu_param(char *s);
static int iommu_populate_page_table(struct domain *d);
int intel_vtd_setup(void);
int amd_iov_detect(void);
+/*
+ * The 'iommu' parameter enables the IOMMU. Optional comma separated
+ * value may contain:
+ *
+ * off|no|false|disable Disable IOMMU (default)
+ * pv Enable IOMMU for PV domains
+ * no-pv Disable IOMMU for PV domains (default)
+ * force|required Don't boot unless IOMMU is enabled
+ */
+custom_param("iommu", parse_iommu_param);
int iommu_enabled = 0;
-boolean_param("iommu", iommu_enabled);
-
int iommu_pv_enabled = 0;
-boolean_param("iommu_pv", iommu_pv_enabled);
+int force_iommu = 0;
+
+static void __init parse_iommu_param(char *s)
+{
+ char *ss;
+ iommu_enabled = 1;
+
+ do {
+ ss = strchr(s, ',');
+ if ( ss )
+ *ss = '\0';
+
+ if ( !strcmp(s, "off") || !strcmp(s, "no") || !strcmp(s, "false") ||
+ !strcmp(s, "0") || !strcmp(s, "disable") )
+ iommu_enabled = 0;
+ else if ( !strcmp(s, "pv") )
+ iommu_pv_enabled = 1;
+ else if ( !strcmp(s, "no-pv") )
+ iommu_pv_enabled = 0;
+ else if ( !strcmp(s, "force") || !strcmp(s, "required") )
+ force_iommu = 1;
+
+ s = ss + 1;
+ } while ( ss );
+}
int iommu_domain_init(struct domain *domain)
{
@@ -126,14 +159,12 @@ static int iommu_populate_page_table(struct domain *d)
return 0;
}
+
void iommu_domain_destroy(struct domain *d)
{
- struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
- uint32_t i;
struct hvm_iommu *hd = domain_hvm_iommu(d);
- struct list_head *ioport_list, *digl_list, *tmp;
+ struct list_head *ioport_list, *tmp;
struct g2m_ioport *ioport;
- struct dev_intx_gsi_link *digl;
if ( !iommu_enabled || !hd->platform_ops )
return;
@@ -148,30 +179,6 @@ void iommu_domain_destroy(struct domain *d)
return;
}
- if ( hvm_irq_dpci != NULL )
- {
- for ( i = 0; i < NR_IRQS; i++ )
- {
- if ( !(hvm_irq_dpci->mirq[i].flags & HVM_IRQ_DPCI_VALID) )
- continue;
-
- pirq_guest_unbind(d, i);
- kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]);
-
- list_for_each_safe ( digl_list, tmp,
- &hvm_irq_dpci->mirq[i].digl_list )
- {
- digl = list_entry(digl_list,
- struct dev_intx_gsi_link, list);
- list_del(&digl->list);
- xfree(digl);
- }
- }
-
- d->arch.hvm_domain.irq.dpci = NULL;
- xfree(hvm_irq_dpci);
- }
-
if ( hd )
{
list_for_each_safe ( ioport_list, tmp, &hd->g2m_ioport_list )
@@ -241,6 +248,9 @@ static int iommu_setup(void)
iommu_enabled = (rc == 0);
out:
+ if ( force_iommu && !iommu_enabled )
+ panic("IOMMU setup failed, crash Xen for security purpose!\n");
+
if ( !iommu_enabled )
iommu_pv_enabled = 0;
printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 6309495f47..df7161d539 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -34,11 +34,11 @@ struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
if ( pdev->bus == bus && pdev->devfn == devfn )
- return pdev;
+ return pdev;
pdev = xmalloc(struct pci_dev);
if ( !pdev )
- return NULL;
+ return NULL;
*((u8*) &pdev->bus) = bus;
*((u8*) &pdev->devfn) = devfn;
@@ -63,12 +63,12 @@ struct pci_dev *pci_lock_pdev(int bus, int devfn)
read_lock(&pcidevs_lock);
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
if ( (pdev->bus == bus || bus == -1) &&
- (pdev->devfn == devfn || devfn == -1) )
- {
- spin_lock(&pdev->lock);
- read_unlock(&pcidevs_lock);
- return pdev;
- }
+ (pdev->devfn == devfn || devfn == -1) )
+ {
+ spin_lock(&pdev->lock);
+ read_unlock(&pcidevs_lock);
+ return pdev;
+ }
read_unlock(&pcidevs_lock);
return NULL;
@@ -81,15 +81,15 @@ struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
read_lock(&pcidevs_lock);
list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
{
- spin_lock(&pdev->lock);
+ spin_lock(&pdev->lock);
if ( (pdev->bus == bus || bus == -1) &&
- (pdev->devfn == devfn || devfn == -1) &&
- (pdev->domain == d) )
- {
- read_unlock(&pcidevs_lock);
- return pdev;
- }
- spin_unlock(&pdev->lock);
+ (pdev->devfn == devfn || devfn == -1) &&
+ (pdev->domain == d) )
+ {
+ read_unlock(&pcidevs_lock);
+ return pdev;
+ }
+ spin_unlock(&pdev->lock);
}
read_unlock(&pcidevs_lock);
@@ -104,19 +104,24 @@ int pci_add_device(u8 bus, u8 devfn)
write_lock(&pcidevs_lock);
pdev = alloc_pdev(bus, devfn);
if ( !pdev )
- goto out;
+ goto out;
ret = 0;
spin_lock(&pdev->lock);
if ( !pdev->domain )
{
- pdev->domain = dom0;
- list_add(&pdev->domain_list, &dom0->arch.pdev_list);
- ret = iommu_add_device(pdev);
+ pdev->domain = dom0;
+ ret = iommu_add_device(pdev);
+ if ( ret )
+ {
+ spin_unlock(&pdev->lock);
+ goto out;
+ }
+ list_add(&pdev->domain_list, &dom0->arch.pdev_list);
}
spin_unlock(&pdev->lock);
printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
out:
write_unlock(&pcidevs_lock);
@@ -131,27 +136,66 @@ int pci_remove_device(u8 bus, u8 devfn)
write_lock(&pcidevs_lock);
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
if ( pdev->bus == bus && pdev->devfn == devfn )
- {
- spin_lock(&pdev->lock);
- ret = iommu_remove_device(pdev);
- if ( pdev->domain )
- list_del(&pdev->domain_list);
- pci_cleanup_msi(pdev);
- free_pdev(pdev);
- printk(XENLOG_DEBUG "PCI remove device %02x:%02x.%x\n", bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
- break;
- }
+ {
+ spin_lock(&pdev->lock);
+ ret = iommu_remove_device(pdev);
+ if ( pdev->domain )
+ list_del(&pdev->domain_list);
+ pci_cleanup_msi(pdev);
+ free_pdev(pdev);
+ printk(XENLOG_DEBUG "PCI remove device %02x:%02x.%x\n", bus,
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+ break;
+ }
write_unlock(&pcidevs_lock);
return ret;
}
+static void pci_clean_dpci_irqs(struct domain *d)
+{
+ struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
+ uint32_t i;
+ struct list_head *digl_list, *tmp;
+ struct dev_intx_gsi_link *digl;
+
+ if ( !iommu_enabled )
+ return;
+
+ if ( !is_hvm_domain(d) && !need_iommu(d) )
+ return;
+
+ if ( hvm_irq_dpci != NULL )
+ {
+ for ( i = 0; i < NR_IRQS; i++ )
+ {
+ if ( !(hvm_irq_dpci->mirq[i].flags & HVM_IRQ_DPCI_VALID) )
+ continue;
+
+ pirq_guest_unbind(d, i);
+ kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]);
+
+ list_for_each_safe ( digl_list, tmp,
+ &hvm_irq_dpci->mirq[i].digl_list )
+ {
+ digl = list_entry(digl_list,
+ struct dev_intx_gsi_link, list);
+ list_del(&digl->list);
+ xfree(digl);
+ }
+ }
+
+ d->arch.hvm_domain.irq.dpci = NULL;
+ xfree(hvm_irq_dpci);
+ }
+}
+
void pci_release_devices(struct domain *d)
{
struct pci_dev *pdev;
u8 bus, devfn;
+ pci_clean_dpci_irqs(d);
while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
{
pci_cleanup_msi(pdev);
@@ -171,14 +215,14 @@ static void dump_pci_devices(unsigned char ch)
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
{
- spin_lock(&pdev->lock);
+ spin_lock(&pdev->lock);
printk("%02x:%02x.%x - dom %-3d - MSIs < ",
pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
pdev->domain ? pdev->domain->domain_id : -1);
- list_for_each_entry ( msi, &pdev->msi_list, list )
- printk("%d ", msi->vector);
- printk(">\n");
- spin_unlock(&pdev->lock);
+ list_for_each_entry ( msi, &pdev->msi_list, list )
+ printk("%d ", msi->vector);
+ printk(">\n");
+ spin_unlock(&pdev->lock);
}
read_unlock(&pcidevs_lock);
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 501622edd1..43107b3ae3 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -238,15 +238,15 @@ static int __init acpi_parse_dev_scope(void *start, void *end,
bus = pci_conf_read8(bus, path->dev, path->fn, PCI_SECONDARY_BUS);
path++;
}
-
+
switch ( acpi_scope->dev_type )
{
case ACPI_DEV_P2PBRIDGE:
{
sec_bus = pci_conf_read8(
- bus, path->dev, path->fn, PCI_SECONDARY_BUS);
+ bus, path->dev, path->fn, PCI_SECONDARY_BUS);
sub_bus = pci_conf_read8(
- bus, path->dev, path->fn, PCI_SUBORDINATE_BUS);
+ bus, path->dev, path->fn, PCI_SUBORDINATE_BUS);
dprintk(XENLOG_INFO VTDPREFIX,
"found bridge: bdf = %x:%x.%x sec = %x sub = %x\n",
bus, path->dev, path->fn, sec_bus, sub_bus);
@@ -255,7 +255,7 @@ static int __init acpi_parse_dev_scope(void *start, void *end,
break;
}
- case ACPI_DEV_MSI_HPET:
+ case ACPI_DEV_MSI_HPET:
dprintk(XENLOG_INFO VTDPREFIX, "found MSI HPET: bdf = %x:%x.%x\n",
bus, path->dev, path->fn);
scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn);
@@ -305,13 +305,6 @@ acpi_parse_one_drhd(struct acpi_dmar_entry_header *header)
int ret = 0;
static int include_all = 0;
- if ( include_all )
- {
- dprintk(XENLOG_WARNING VTDPREFIX,
- "DMAR unit with INCLUDE_ALL is not not the last unit.\n");
- return -EINVAL;
- }
-
dmaru = xmalloc(struct acpi_drhd_unit);
if ( !dmaru )
return -ENOMEM;
@@ -331,6 +324,13 @@ acpi_parse_one_drhd(struct acpi_dmar_entry_header *header)
if ( dmaru->include_all )
{
dprintk(XENLOG_INFO VTDPREFIX, "found INCLUDE_ALL\n");
+ /* Only allow one INCLUDE_ALL */
+ if ( include_all )
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "Only one INCLUDE_ALL device scope is allowed\n");
+ ret = -EINVAL;
+ }
include_all = 1;
}
@@ -349,6 +349,12 @@ acpi_parse_one_rmrr(struct acpi_dmar_entry_header *header)
void *dev_scope_start, *dev_scope_end;
int ret = 0;
+ if ( rmrr->base_address >= rmrr->end_address )
+ {
+ dprintk(XENLOG_ERR VTDPREFIX, "RMRR is incorrect.\n");
+ return -EFAULT;
+ }
+
rmrru = xmalloc(struct acpi_rmrr_unit);
if ( !rmrru )
return -ENOMEM;
@@ -390,7 +396,8 @@ acpi_parse_one_atsr(struct acpi_dmar_entry_header *header)
ret = acpi_parse_dev_scope(dev_scope_start, dev_scope_end,
atsru, ATSR_TYPE);
}
- else {
+ else
+ {
dprintk(XENLOG_INFO VTDPREFIX, "found ALL_PORTS\n");
/* Only allow one ALL_PORTS */
if ( all_ports )
@@ -420,6 +427,9 @@ static int __init acpi_parse_dmar(struct acpi_table_header *table)
if ( !dmar->width )
{
dprintk(XENLOG_WARNING VTDPREFIX, "Zero: Invalid DMAR width\n");
+ if ( force_iommu )
+ panic("acpi_parse_dmar: Invalid DMAR width,"
+ " crash Xen for security purpose!\n");
return -EINVAL;
}
@@ -461,8 +471,15 @@ static int __init acpi_parse_dmar(struct acpi_table_header *table)
if ( ret )
{
- printk(XENLOG_WARNING "Failed to parse ACPI DMAR. Disabling VT-d.\n");
- disable_all_dmar_units();
+ if ( force_iommu )
+ panic("acpi_parse_dmar: Failed to parse ACPI DMAR,"
+ " crash Xen for security purpose!\n");
+ else
+ {
+ printk(XENLOG_WARNING
+ "Failed to parse ACPI DMAR. Disabling VT-d.\n");
+ disable_all_dmar_units();
+ }
}
return ret;
@@ -473,14 +490,16 @@ int acpi_dmar_init(void)
int rc;
rc = -ENODEV;
+ if ( force_iommu )
+ iommu_enabled = 1;
+
if ( !iommu_enabled )
goto fail;
- if ( (rc = vtd_hw_check()) != 0 )
+ rc = acpi_table_parse(ACPI_SIG_DMAR, acpi_parse_dmar);
+ if ( rc )
goto fail;
- acpi_table_parse(ACPI_SIG_DMAR, acpi_parse_dmar);
-
rc = -ENODEV;
if ( list_empty(&acpi_drhd_units) )
goto fail;
@@ -490,6 +509,10 @@ int acpi_dmar_init(void)
return 0;
fail:
+ if ( force_iommu )
+ panic("acpi_dmar_init: acpi_dmar_init failed,"
+ " crash Xen for security purpose!\n");
+
vtd_enabled = 0;
return -ENODEV;
}
diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
index 0220ae97b3..19091216e9 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -28,6 +28,7 @@ extern struct ir_ctrl *ir_ctrl;
void print_iommu_regs(struct acpi_drhd_unit *drhd);
void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
+void dump_iommu_info(unsigned char key);
int qinval_setup(struct iommu *iommu);
int intremap_setup(struct iommu *iommu);
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index 3a11f5f007..d53140b7b0 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -28,6 +28,7 @@
#include <xen/time.h>
#include <xen/pci.h>
#include <xen/pci_regs.h>
+#include <xen/keyhandler.h>
#include <asm/paging.h>
#include <asm/msi.h>
#include "iommu.h"
@@ -278,8 +279,8 @@ static void iommu_flush_write_buffer(struct iommu *iommu)
if ( !(val & DMA_GSTS_WBFS) )
break;
if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
- panic("DMAR hardware is malfunctional,"
- " please disable IOMMU\n");
+ panic("%s: DMAR hardware is malfunctional,"
+ " please disable IOMMU\n", __func__);
cpu_relax();
}
spin_unlock_irqrestore(&iommu->register_lock, flag);
@@ -339,7 +340,8 @@ static int flush_context_reg(
if ( !(val & DMA_CCMD_ICC) )
break;
if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
- panic("DMAR hardware is malfunctional, please disable IOMMU\n");
+ panic("%s: DMAR hardware is malfunctional,"
+ " please disable IOMMU\n", __func__);
cpu_relax();
}
spin_unlock_irqrestore(&iommu->register_lock, flag);
@@ -436,20 +438,20 @@ static int flush_iotlb_reg(void *_iommu, u16 did,
if ( !(val & DMA_TLB_IVT) )
break;
if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
- panic("DMAR hardware is malfunctional, please disable IOMMU\n");
+ panic("%s: DMAR hardware is malfunctional,"
+ " please disable IOMMU\n", __func__);
cpu_relax();
}
spin_unlock_irqrestore(&iommu->register_lock, flag);
/* check IOTLB invalidation granularity */
if ( DMA_TLB_IAIG(val) == 0 )
- printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
+ dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
-#ifdef VTD_DEBUG
if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
- printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "IOMMU: tlb flush request %x, actual %x\n",
(u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
-#endif
/* flush context entry will implictly flush write buffer */
return 0;
}
@@ -492,8 +494,8 @@ static int inline iommu_flush_iotlb_psi(
unsigned int align;
struct iommu_flush *flush = iommu_get_flush(iommu);
- BUG_ON(addr & (~PAGE_MASK_4K));
- BUG_ON(pages == 0);
+ ASSERT(!(addr & (~PAGE_MASK_4K)));
+ ASSERT(pages > 0);
/* Fallback to domain selective flush if no PSI support */
if ( !cap_pgsel_inv(iommu->cap) )
@@ -560,8 +562,9 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr)
{
iommu = drhd->iommu;
if ( test_bit(iommu->index, &hd->iommu_bitmap) )
- iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
- addr, 1, 0);
+ if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+ addr, 1, 0))
+ iommu_flush_write_buffer(iommu);
}
unmap_vtd_domain_page(page);
@@ -631,7 +634,10 @@ static int iommu_set_root_entry(struct iommu *iommu)
iommu->root_maddr = alloc_pgtable_maddr();
if ( iommu->root_maddr == 0 )
+ {
+ spin_unlock_irqrestore(&iommu->register_lock, flags);
return -ENOMEM;
+ }
dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
cmd = iommu->gcmd | DMA_GCMD_SRTP;
@@ -645,7 +651,8 @@ static int iommu_set_root_entry(struct iommu *iommu)
if ( sts & DMA_GSTS_RTPS )
break;
if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
- panic("DMAR hardware is malfunctional, please disable IOMMU\n");
+ panic("%s: DMAR hardware is malfunctional,"
+ " please disable IOMMU\n", __func__);
cpu_relax();
}
@@ -673,7 +680,8 @@ static int iommu_enable_translation(struct iommu *iommu)
if ( sts & DMA_GSTS_TES )
break;
if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
- panic("DMAR hardware is malfunctional, please disable IOMMU\n");
+ panic("%s: DMAR hardware is malfunctional,"
+ " please disable IOMMU\n", __func__);
cpu_relax();
}
@@ -701,7 +709,8 @@ int iommu_disable_translation(struct iommu *iommu)
if ( !(sts & DMA_GSTS_TES) )
break;
if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
- panic("DMAR hardware is malfunctional, please disable IOMMU\n");
+ panic("%s: DMAR hardware is malfunctional,"
+ " please disable IOMMU\n", __func__);
cpu_relax();
}
spin_unlock_irqrestore(&iommu->register_lock, flags);
@@ -1261,13 +1270,15 @@ static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
ob = bus; odf = devfn;
if ( !find_pcie_endpoint(&bus, &devfn, &secbus) )
{
- gdprintk(XENLOG_WARNING VTDPREFIX, "domain_context_mapping:invalid");
+ gdprintk(XENLOG_WARNING VTDPREFIX,
+ "domain_context_mapping:invalid\n");
break;
}
if ( ob != bus || odf != devfn )
gdprintk(XENLOG_INFO VTDPREFIX,
- "domain_context_mapping:map: bdf = %x:%x.%x -> %x:%x.%x\n",
+ "domain_context_mapping:map: "
+ "bdf = %x:%x.%x -> %x:%x.%x\n",
ob, PCI_SLOT(odf), PCI_FUNC(odf),
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1302,10 +1313,6 @@ static int domain_context_unmap_one(
struct context_entry *context, *context_entries;
unsigned long flags;
u64 maddr;
- struct acpi_rmrr_unit *rmrr;
- u16 bdf;
- int i;
- unsigned int is_rmrr_device = 0;
maddr = bus_to_context_maddr(iommu, bus);
context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
@@ -1318,25 +1325,11 @@ static int domain_context_unmap_one(
}
spin_lock_irqsave(&iommu->lock, flags);
- if ( domain->domain_id == 0 )
- {
- for_each_rmrr_device ( rmrr, bdf, i )
- {
- if ( PCI_BUS(bdf) == bus && PCI_DEVFN2(bdf) == devfn )
- {
- is_rmrr_device = 1;
- break;
- }
- }
- }
- if ( !is_rmrr_device )
- {
- context_clear_present(*context);
- context_clear_entry(*context);
- iommu_flush_cache_entry(context);
- iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0);
- iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
- }
+ context_clear_present(*context);
+ context_clear_entry(*context);
+ iommu_flush_cache_entry(context);
+ iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0);
+ iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
unmap_vtd_domain_page(context_entries);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1409,12 +1402,15 @@ static int reassign_device_ownership(
pdev_iommu = drhd->iommu;
domain_context_unmap(source, bus, devfn);
+ ret = domain_context_mapping(target, bus, devfn);
+ if ( ret )
+ return ret;
+
write_lock(&pcidevs_lock);
list_move(&pdev->domain_list, &target->arch.pdev_list);
write_unlock(&pcidevs_lock);
pdev->domain = target;
- ret = domain_context_mapping(target, bus, devfn);
spin_unlock(&pdev->lock);
read_lock(&pcidevs_lock);
@@ -1553,16 +1549,16 @@ int iommu_page_mapping(struct domain *domain, paddr_t iova,
index++;
}
- for_each_drhd_unit ( drhd )
+ if ( index > 0 )
{
- iommu = drhd->iommu;
-
- if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
- continue;
-
- if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
- iova, index, 1) )
- iommu_flush_write_buffer(iommu);
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( test_bit(iommu->index, &hd->iommu_bitmap) )
+ if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+ iova, index, 1))
+ iommu_flush_write_buffer(iommu);
+ }
}
return 0;
@@ -1598,15 +1594,59 @@ static int iommu_prepare_rmrr_dev(struct domain *d,
static int intel_iommu_add_device(struct pci_dev *pdev)
{
+ struct acpi_rmrr_unit *rmrr;
+ u16 bdf;
+ int ret, i;
+
if ( !pdev->domain )
return -EINVAL;
- return domain_context_mapping(pdev->domain, pdev->bus, pdev->devfn);
+
+ ret = domain_context_mapping(pdev->domain, pdev->bus, pdev->devfn);
+ if ( ret )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "intel_iommu_add_device: context mapping failed\n");
+ return ret;
+ }
+
+ for_each_rmrr_device ( rmrr, bdf, i )
+ {
+ if ( PCI_BUS(bdf) == pdev->bus && PCI_DEVFN2(bdf) == pdev->devfn )
+ {
+ ret = iommu_prepare_rmrr_dev(pdev->domain, rmrr,
+ pdev->bus, pdev->devfn);
+ if ( ret )
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "intel_iommu_add_device: RMRR mapping failed\n");
+ break;
+ }
+ }
+
+ return ret;
}
static int intel_iommu_remove_device(struct pci_dev *pdev)
{
+ struct acpi_rmrr_unit *rmrr;
+ u16 bdf;
+ int i;
+
if ( !pdev->domain )
return -EINVAL;
+
+ /* If the device belongs to dom0, and it has RMRR, don't remove it
+ * from dom0, because BIOS may use RMRR at booting time.
+ */
+ if ( pdev->domain->domain_id == 0 )
+ {
+ for_each_rmrr_device ( rmrr, bdf, i )
+ {
+ if ( PCI_BUS(bdf) == pdev->bus &&
+ PCI_DEVFN2(bdf) == pdev->devfn )
+ return 0;
+ }
+ }
+
return domain_context_unmap(pdev->domain, pdev->bus, pdev->devfn);
}
@@ -1751,6 +1791,8 @@ int intel_vtd_setup(void)
init_vtd_hw();
+ register_keyhandler('V', dump_iommu_info, "dump iommu info");
+
return 0;
error:
@@ -1803,11 +1845,9 @@ int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
if ( ret )
- {
gdprintk(XENLOG_ERR VTDPREFIX,
"IOMMU: mapping reserved region failed\n");
- return ret;
- }
+ return ret;
}
}
diff --git a/xen/drivers/passthrough/vtd/utils.c b/xen/drivers/passthrough/vtd/utils.c
index 9f26c1a088..ba9420fb3c 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -27,10 +27,7 @@
#include "iommu.h"
#include "dmar.h"
#include "vtd.h"
-
-#define INTEL 0x8086
-#define SEABURG 0x4000
-#define C_STEP 2
+#include "extern.h"
int is_usb_device(u8 bus, u8 devfn)
{
@@ -39,32 +36,6 @@ int is_usb_device(u8 bus, u8 devfn)
return (class == 0xc03);
}
-int vtd_hw_check(void)
-{
- u16 vendor, device;
- u8 revision, stepping;
-
- vendor = pci_conf_read16(0, 0, 0, PCI_VENDOR_ID);
- device = pci_conf_read16(0, 0, 0, PCI_DEVICE_ID);
- revision = pci_conf_read8(0, 0, 0, PCI_REVISION_ID);
- stepping = revision & 0xf;
-
- if ( (vendor == INTEL) && (device == SEABURG) )
- {
- if ( stepping < C_STEP )
- {
- dprintk(XENLOG_WARNING VTDPREFIX,
- "*** VT-d disabled - pre C0-step Seaburg found\n");
- dprintk(XENLOG_WARNING VTDPREFIX,
- "*** vendor = %x device = %x revision = %x\n",
- vendor, device, revision);
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
/* Disable vt-d protected memory registers. */
void disable_pmr(struct iommu *iommu)
{
@@ -232,6 +203,111 @@ void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn)
} while ( --level );
}
+void dump_iommu_info(unsigned char key)
+{
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+ int i;
+
+ for_each_drhd_unit ( drhd )
+ {
+ u32 status = 0;
+
+ iommu = drhd->iommu;
+ printk("\niommu %x: nr_pt_levels = %x.\n", iommu->index,
+ iommu->nr_pt_levels);
+
+ if ( ecap_queued_inval(iommu->ecap) || ecap_intr_remap(iommu->ecap) )
+ status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
+
+ printk(" Queued Invalidation: %ssupported%s.\n",
+ ecap_queued_inval(iommu->ecap) ? "" : "not ",
+ (status & DMA_GSTS_QIES) ? " and enabled" : "" );
+
+
+ printk(" Interrupt Remapping: %ssupported%s.\n",
+ ecap_intr_remap(iommu->ecap) ? "" : "not ",
+ (status & DMA_GSTS_IRES) ? " and enabled" : "" );
+
+ if ( status & DMA_GSTS_IRES )
+ {
+ /* Dump interrupt remapping table. */
+ u64 iremap_maddr = dmar_readq(iommu->reg, DMAR_IRTA_REG);
+ int nr_entry = 1 << ((iremap_maddr & 0xF) + 1);
+ struct iremap_entry *iremap_entries =
+ (struct iremap_entry *)map_vtd_domain_page(iremap_maddr);
+
+ printk(" Interrupt remapping table (nr_entry=0x%x. "
+ "Only dump P=1 entries here):\n", nr_entry);
+ printk(" SVT SQ SID DST V AVL DLM TM RH DM "
+ "FPD P\n");
+ for ( i = 0; i < nr_entry; i++ )
+ {
+ struct iremap_entry *p = iremap_entries + i;
+
+ if ( !p->lo.p )
+ continue;
+ printk(" %04x: %x %x %04x %08x %02x %x %x %x %x %x"
+ " %x %x\n", i,
+ (u32)p->hi.svt, (u32)p->hi.sq, (u32)p->hi.sid,
+ (u32)p->lo.dst, (u32)p->lo.vector, (u32)p->lo.avail,
+ (u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
+ (u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
+ }
+
+ unmap_vtd_domain_page(iremap_entries);
+ }
+ }
+
+ /* Dump the I/O xAPIC redirection table(s). */
+ if ( vtd_enabled )
+ {
+ int apic, reg;
+ union IO_APIC_reg_01 reg_01;
+ struct IO_APIC_route_entry rte = { 0 };
+ struct IO_APIC_route_remap_entry *remap;
+ struct ir_ctrl *ir_ctrl;
+
+ for ( apic = 0; apic < nr_ioapics; apic++ )
+ {
+ iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
+ ir_ctrl = iommu_ir_ctrl(iommu);
+ if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ||
+ ir_ctrl->iremap_index == -1 )
+ continue;
+
+ printk( "\nRedirection table of IOAPIC %x:\n", apic);
+
+ reg = 1; /* IO xAPIC Version Register. */
+ *IO_APIC_BASE(apic) = reg;
+ reg_01.raw = *(IO_APIC_BASE(apic)+4);
+
+ printk(" #entry IDX FMT MASK TRIG IRR POL STAT DELI VECTOR\n");
+ for ( i = 0; i <= reg_01.bits.entries; i++ )
+ {
+ reg = 0x10 + i*2;
+ *IO_APIC_BASE(apic) = reg;
+ *(((u32 *)&rte) + 0) = *(IO_APIC_BASE(apic)+4);
+
+ *IO_APIC_BASE(apic) = reg + 1;
+ *(((u32 *)&rte) + 1) = *(IO_APIC_BASE(apic)+4);
+
+ remap = (struct IO_APIC_route_remap_entry *) &rte;
+ if ( !remap->format )
+ continue;
+
+ printk(" %02x: %04x %x %x %x %x %x %x"
+ " %x %02x\n", i,
+ (u32)remap->index_0_14 | ((u32)remap->index_15 << 15),
+ (u32)remap->format, (u32)remap->mask, (u32)remap->trigger,
+ (u32)remap->irr, (u32)remap->polarity,
+ (u32)remap->delivery_status, (u32)remap->delivery_mode,
+ (u32)remap->vector);
+ }
+ }
+ }
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/include/acpi/cpufreq/processor_perf.h b/xen/include/acpi/cpufreq/processor_perf.h
index 944aaf63b0..f5251a553c 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -4,6 +4,8 @@
#include <public/platform.h>
#include <public/sysctl.h>
+#define XEN_PX_INIT 0x80000000
+
int get_cpu_id(u8);
int acpi_cpufreq_init(void);
int powernow_cpufreq_init(void);
diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h
index f22f8cea67..aaf0613677 100644
--- a/xen/include/asm-ia64/config.h
+++ b/xen/include/asm-ia64/config.h
@@ -44,8 +44,6 @@
#define CONFIG_IOSAPIC
#define supervisor_mode_kernel (0)
-#define CONFIG_DMA_BITSIZE 32
-
#define PADDR_BITS 48
/* If PERFC is used, include privop maps. */
diff --git a/xen/include/asm-x86/amd-iommu.h b/xen/include/asm-x86/amd-iommu.h
index 8acfb55189..cdc99dd6e8 100644
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -48,10 +48,6 @@ struct amd_iommu {
u8 unit_id;
u8 msi_number;
- u8 root_bus;
- u8 first_devfn;
- u8 last_devfn;
-
u8 pte_not_present_cached;
u8 ht_tunnel_support;
u8 iotlb_support;
@@ -81,6 +77,9 @@ struct amd_iommu {
int msi_cap;
int maskbit;
+
+ int enabled;
+ int vector;
};
struct ivrs_mappings {
@@ -92,5 +91,6 @@ struct ivrs_mappings {
u8 read_permission;
unsigned long addr_range_start;
unsigned long addr_range_length;
+ struct amd_iommu *iommu;
};
#endif /* _ASM_X86_64_AMD_IOMMU_H */
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index 19bb4209f5..68d82791f1 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -97,8 +97,6 @@
/* Primary stack is restricted to 8kB by guard pages. */
#define PRIMARY_STACK_SIZE 8192
-#define CONFIG_DMA_BITSIZE 32
-
#define BOOT_TRAMPOLINE 0x8c000
#define bootsym_phys(sym) \
(((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 81264b0d61..e9e47990c0 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -44,22 +44,17 @@
#define amd_iov_error(fmt, args...)
#endif
-typedef int (*iommu_detect_callback_ptr_t)(
- u8 bus, u8 dev, u8 func, u8 cap_ptr);
-
/* amd-iommu-detect functions */
-int __init scan_for_iommu(iommu_detect_callback_ptr_t iommu_detect_callback);
-int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
- struct amd_iommu *iommu);
-int __init get_iommu_last_downstream_bus(struct amd_iommu *iommu);
+int __init amd_iommu_get_ivrs_dev_entries(void);
+int __init amd_iommu_detect_one_acpi(void *ivhd);
+int __init amd_iommu_detect_acpi(void);
/* amd-iommu-init functions */
-int __init map_iommu_mmio_region(struct amd_iommu *iommu);
-void __init unmap_iommu_mmio_region(struct amd_iommu *iommu);
-void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu);
-void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu);
-void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu);
-void __init enable_iommu(struct amd_iommu *iommu);
+int __init amd_iommu_init(void);
+int __init amd_iommu_init_one(struct amd_iommu *iommu);
+int __init amd_iommu_update_ivrs_mapping_acpi(void);
+void __init amd_iommu_init_cleanup(void);
+int __init amd_iommu_setup_shared_tables(void);
/* mapping functions */
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
@@ -83,11 +78,9 @@ void flush_command_buffer(struct amd_iommu *iommu);
/* find iommu for bdf */
struct amd_iommu *find_iommu_for_device(int bus, int devfn);
-/* amd-iommu-acpi functions */
-int __init parse_ivrs_table(struct acpi_table_header *table);
-
/*interrupt remapping */
-int amd_iommu_setup_intremap_table(void);
+int __init amd_iommu_setup_intremap_table(void);
+int __init deallocate_intremap_table(void);
void invalidate_interrupt_table(struct amd_iommu *iommu, u16 device_id);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h
index 72b6defb97..b364e38d7f 100644
--- a/xen/include/asm-x86/io_apic.h
+++ b/xen/include/asm-x86/io_apic.h
@@ -160,6 +160,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
/* 1 if "noapic" boot option passed */
extern int skip_ioapic_setup;
+extern int msi_enable;
+
/*
* If we use the IO-APIC for IRQ routing, disable automatic
* assignment of PCI IRQ's.
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 4e478502c3..4909c06591 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -61,7 +61,7 @@ typedef enum {
p2m_invalid = 0, /* Nothing mapped here */
p2m_ram_rw = 1, /* Normal read/write guest RAM */
p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */
- p2m_ram_ro = 3, /* Read-only; writes go to the device model */
+ p2m_ram_ro = 3, /* Read-only; writes are silently dropped */
p2m_mmio_dm = 4, /* Reads and write go to the device model */
p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */
} p2m_type_t;
diff --git a/xen/include/public/io/fsif.h b/xen/include/public/io/fsif.h
index 0e1bc822b5..04ef928464 100644
--- a/xen/include/public/io/fsif.h
+++ b/xen/include/public/io/fsif.h
@@ -54,30 +54,31 @@ struct fsif_close_request {
struct fsif_read_request {
uint32_t fd;
- grant_ref_t gref;
+ int32_t pad;
uint64_t len;
uint64_t offset;
+ grant_ref_t grefs[1]; /* Variable length */
};
struct fsif_write_request {
uint32_t fd;
- grant_ref_t gref;
+ int32_t pad;
uint64_t len;
uint64_t offset;
+ grant_ref_t grefs[1]; /* Variable length */
};
struct fsif_stat_request {
uint32_t fd;
- grant_ref_t gref;
};
-/* This structure is a copy of some fields from stat structure, writen to the
- * granted page. */
+/* This structure is a copy of some fields from stat structure, returned
+ * via the ring. */
struct fsif_stat_response {
int32_t stat_mode;
uint32_t stat_uid;
uint32_t stat_gid;
- int32_t pad;
+ int32_t stat_ret;
int64_t stat_size;
int64_t stat_atime;
int64_t stat_mtime;
@@ -165,11 +166,20 @@ struct fsif_response {
uint16_t id;
uint16_t pad1;
uint32_t pad2;
- uint64_t ret_val;
+ union {
+ uint64_t ret_val;
+ struct fsif_stat_response fstat;
+ };
};
typedef struct fsif_response fsif_response_t;
+#define FSIF_RING_ENTRY_SIZE 64
+
+#define FSIF_NR_READ_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_read_request)) / \
+ sizeof(grant_ref_t) + 1)
+#define FSIF_NR_WRITE_GNTS ((FSIF_RING_ENTRY_SIZE - sizeof(struct fsif_write_request)) / \
+ sizeof(grant_ref_t) + 1)
DEFINE_RING_TYPES(fsif, struct fsif_request, struct fsif_response);
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index f310fe8d12..4b444b4c71 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -554,6 +554,7 @@ typedef struct start_info start_info_t;
/* These flags are passed in the 'flags' field of start_info_t. */
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
+#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */
typedef struct dom0_vga_console_info {
uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 20ba06062c..d5c39b99ed 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -30,9 +30,9 @@
extern int vtd_enabled;
extern int iommu_enabled;
extern int iommu_pv_enabled;
+extern int force_iommu;
#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)
-#define domain_vmx_iommu(d) (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
#define MAX_IOMMUS 32
diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h
index 5b198d8509..bfdbe5c71a 100644
--- a/xen/include/xen/irq.h
+++ b/xen/include/xen/irq.h
@@ -78,7 +78,7 @@ struct vcpu;
extern int pirq_guest_eoi(struct domain *d, int irq);
extern int pirq_guest_unmask(struct domain *d);
extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
-extern int pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_guest_unbind(struct domain *d, int irq);
static inline void set_native_irq_info(int irq, cpumask_t mask)
{
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 9d38805bf3..b6a3faabfa 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -137,7 +137,9 @@ struct vcpu
unsigned long pause_flags;
atomic_t pause_count;
+ /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
u16 virq_to_evtchn[NR_VIRQS];
+ spinlock_t virq_lock;
/* Bitmask of CPUs on which this VCPU may run. */
cpumask_t cpu_affinity;
@@ -541,7 +543,7 @@ static inline void vcpu_unblock(struct vcpu *v)
#define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
extern enum cpufreq_controller {
- FREQCTL_none, FREQCTL_dom0_kernel
+ FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
#endif /* __SCHED_H__ */
diff --git a/xen/include/xen/shutdown.h b/xen/include/xen/shutdown.h
index 3f6d0af8db..f29cb7d247 100644
--- a/xen/include/xen/shutdown.h
+++ b/xen/include/xen/shutdown.h
@@ -6,7 +6,7 @@ extern int opt_noreboot;
void dom0_shutdown(u8 reason);
-void machine_restart(void);
+void machine_restart(unsigned int delay_millisecs);
void machine_halt(void);
void machine_power_off(void);
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index fa8c82f8ea..298211cb2a 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -85,8 +85,8 @@ typedef struct { int gcc_is_buggy; } rwlock_t;
/* Ensure a lock is quiescent between two critical operations. */
static inline void spin_barrier(spinlock_t *lock)
{
- spin_lock(lock);
- spin_unlock(lock);
+ do { mb(); } while ( spin_is_locked(lock) );
+ mb();
}
#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED