aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Deegan <Tim.Deegan@citrix.com>2011-03-07 11:34:09 +0000
committerTim Deegan <Tim.Deegan@citrix.com>2011-03-07 11:34:09 +0000
commite4cdd74f02eaeea96348dc51534eff8e03d87697 (patch)
tree7e6947edf542bd1c6e87205fe7e7cec3d0388fa9
parent436323c065f0ace4f469f5854f43423702a5082a (diff)
downloadxen-e4cdd74f02eaeea96348dc51534eff8e03d87697.tar.gz
xen-e4cdd74f02eaeea96348dc51534eff8e03d87697.tar.bz2
xen-e4cdd74f02eaeea96348dc51534eff8e03d87697.zip
xen: add "lto=y" option to build Xen with link-time optimizations.
This involves gathering object files from .asm (which will be binary) and object files from .c (which will be in LTO format) separately until the final link. Only tested for x86_64 Xen builds using Clang/LLVM bitcode; it should be possible to do the same with newer GCCs and GIMPLE. Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com> Acked-by: Keir Fraser <keir@xen.org>
-rw-r--r--config/StdGNU.mk12
-rw-r--r--xen/Makefile2
-rw-r--r--xen/Rules.mk39
-rw-r--r--xen/arch/x86/Makefile28
-rw-r--r--xen/arch/x86/acpi/Makefile4
-rw-r--r--xen/arch/x86/boot/Makefile2
-rw-r--r--xen/arch/x86/boot/build32.mk1
-rw-r--r--xen/arch/x86/hvm/svm/Makefile2
-rw-r--r--xen/arch/x86/hvm/vmx/Makefile2
-rw-r--r--xen/arch/x86/x86_32/Makefile6
-rw-r--r--xen/arch/x86/x86_64/Makefile6
-rw-r--r--xen/arch/x86/x86_64/compat/Makefile2
-rw-r--r--xen/common/libelf/Makefile2
13 files changed, 82 insertions, 26 deletions
diff --git a/config/StdGNU.mk b/config/StdGNU.mk
index d73e644bbf..786efd7e20 100644
--- a/config/StdGNU.mk
+++ b/config/StdGNU.mk
@@ -1,10 +1,11 @@
AS = $(CROSS_COMPILE)as
+LD = $(CROSS_COMPILE)ld
ifeq ($(clang),y)
-LD = $(CROSS_COMPILE)gold
CC = $(CROSS_COMPILE)clang
+LD_LTO = $(CROSS_COMPILE)llvm-ld
else
-LD = $(CROSS_COMPILE)ld
CC = $(CROSS_COMPILE)gcc
+LD_LTO = $(CROSS_COMPILE)ld
endif
CPP = $(CC) -E
AR = $(CROSS_COMPILE)ar
@@ -79,3 +80,10 @@ ifneq ($(clang),y)
CFLAGS += -fno-optimize-sibling-calls
endif
endif
+
+ifeq ($(lto),y)
+CFLAGS += -flto
+ifeq ($(clang),y)
+LDFLAGS += -plugin LLVMgold.so
+endif
+endif
diff --git a/xen/Makefile b/xen/Makefile
index 64f73cb64c..d38caf102c 100644
--- a/xen/Makefile
+++ b/xen/Makefile
@@ -159,4 +159,4 @@ FORCE:
$(MAKE) -f $(BASEDIR)/Rules.mk -C $(*D) $(@F)
%/: FORCE
- $(MAKE) -f $(BASEDIR)/Rules.mk -C $* built_in.o
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C $* built_in.o built_in_bin.o
diff --git a/xen/Rules.mk b/xen/Rules.mk
index 59d0dc7c31..9a9829bcb3 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -9,6 +9,8 @@ perfc_arrays ?= n
lock_profile ?= n
crash_debug ?= n
frame_pointer ?= n
+clang ?= n
+lto ?= n
XEN_ROOT=$(BASEDIR)/..
include $(XEN_ROOT)/Config.mk
@@ -75,7 +77,8 @@ CFLAGS += $(CFLAGS-y)
# Most CFLAGS are safe for assembly files:
# -std=gnu{89,99} gets confused by #-prefixed end-of-line comments
-AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(CFLAGS))
+# -flto makes no sense and annoys clang
+AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(filter-out -flto,$(CFLAGS)))
# LDFLAGS are only passed directly to $(LD)
LDFLAGS += $(LDFLAGS_DIRECT)
@@ -86,18 +89,45 @@ include Makefile
subdir-n := $(patsubst %,%/,$(patsubst %/,%,$(subdir-n) $(subdir-)))
subdir-y := $(patsubst %,%/,$(patsubst %/,%,$(subdir-y)))
-# Add explicitly declared subdirectories to the object list.
+# Add explicitly declared subdirectories to the object lists.
obj-y += $(patsubst %/,%/built_in.o,$(subdir-y))
-# Add implicitly declared subdirectories (in the object list) to the
+# Add implicitly declared subdirectories (in the object lists) to the
# subdirectory list, and rewrite the object-list entry.
subdir-y += $(filter %/,$(obj-y))
obj-y := $(patsubst %/,%/built-in.o,$(obj-y))
subdir-all := $(subdir-y) $(subdir-n)
+ifeq ($(lto),y)
+# Would like to handle all object files as bitcode, but objects made from
+# pure asm are in a different format and have to be collected separately.
+# Mirror the directory tree, collecting them as built_in_bin.o.
+# If there are no binary objects in a given directory, make a dummy .o
+obj-bin-y += $(patsubst %/built_in.o,%/built_in_bin.o,$(filter %/built_in.o,$(obj-y)))
+else
+# For a non-LTO build, bundle obj-bin targets in with the normal objs.
+obj-y += $(obj-bin-y)
+obj-bin-y :=
+endif
+
built_in.o: $(obj-y)
+ifeq ($(obj-y),)
+ $(CC) $(CFLAGS) -c -x c /dev/null -o $@
+else
+ifeq ($(lto),y)
+ $(LD_LTO) -r -o $@ $^
+else
+ $(LD) $(LDFLAGS) -r -o $@ $^
+endif
+endif
+
+built_in_bin.o: $(obj-bin-y)
+ifeq ($(obj-bin-y),)
+ $(CC) $(AFLAGS) -c -x assembler /dev/null -o $@
+else
$(LD) $(LDFLAGS) -r -o $@ $^
+endif
# Force execution of pattern rules (for which PHONY cannot be directly used).
.PHONY: FORCE
@@ -106,6 +136,9 @@ FORCE:
%/built_in.o: FORCE
$(MAKE) -f $(BASEDIR)/Rules.mk -C $* built_in.o
+%/built_in_bin.o: FORCE
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C $* built_in_bin.o
+
.PHONY: clean
clean:: $(addprefix _clean_, $(subdir-all))
rm -f *.o *~ core $(DEPS)
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 808d1f5d7d..b7d1ff608f 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -10,8 +10,8 @@ subdir-$(x86_64) += x86_64
obj-y += apic.o
obj-y += bitops.o
-obj-y += clear_page.o
-obj-y += copy_page.o
+obj-bin-y += clear_page.o
+obj-bin-y += copy_page.o
obj-y += compat.o
obj-y += debug.o
obj-y += delay.o
@@ -65,24 +65,38 @@ $(TARGET): $(TARGET)-syms boot/mkelf32
./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
`$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
+
ALL_OBJS := $(BASEDIR)/arch/x86/boot/built_in.o $(ALL_OBJS)
-$(TARGET)-syms: $(ALL_OBJS) xen.lds
+ifeq ($(lto),y)
+# Gather all LTO objects together
+prelink_lto.o: $(ALL_OBJS)
+ $(LD_LTO) -r -o $@ $^
+
+# Link it with all the binary objects
+prelink.o: $(patsubst %/built_in.o,%/built_in_bin.o,$(ALL_OBJS)) prelink_lto.o
+ $(LD) $(LDFLAGS) -r -o $@ $^
+else
+prelink.o: $(ALL_OBJS)
+ $(LD) $(LDFLAGS) -r -o $@ $^
+endif
+
+$(TARGET)-syms: prelink.o xen.lds
$(MAKE) -f $(BASEDIR)/Rules.mk $(BASEDIR)/common/symbols-dummy.o
- $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
+ $(LD) $(LDFLAGS) -T xen.lds -N prelink.o \
$(BASEDIR)/common/symbols-dummy.o -o $(@D)/.$(@F).0
$(NM) -n $(@D)/.$(@F).0 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).0.S
$(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).0.o
- $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
+ $(LD) $(LDFLAGS) -T xen.lds -N prelink.o \
$(@D)/.$(@F).0.o -o $(@D)/.$(@F).1
$(NM) -n $(@D)/.$(@F).1 | $(BASEDIR)/tools/symbols >$(@D)/.$(@F).1.S
$(MAKE) -f $(BASEDIR)/Rules.mk $(@D)/.$(@F).1.o
- $(LD) $(LDFLAGS) -T xen.lds -N $(ALL_OBJS) \
+ $(LD) $(LDFLAGS) -T xen.lds -N prelink.o \
$(@D)/.$(@F).1.o -o $@
rm -f $(@D)/.$(@F).[0-9]*
asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
- $(CC) $(CFLAGS) -S -o $@ $<
+ $(CC) $(filter-out -flto,$(CFLAGS)) -S -o $@ $<
xen.lds: xen.lds.S
$(CC) -P -E -Ui386 $(AFLAGS) -o $@ $<
diff --git a/xen/arch/x86/acpi/Makefile b/xen/arch/x86/acpi/Makefile
index f31302c368..76b143d1bc 100644
--- a/xen/arch/x86/acpi/Makefile
+++ b/xen/arch/x86/acpi/Makefile
@@ -1,4 +1,4 @@
subdir-y += cpufreq
-obj-y += boot.o
-obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
+obj-y += boot.o power.o suspend.o cpu_idle.o cpuidle_menu.o
+obj-bin-y += wakeup_prot.o
diff --git a/xen/arch/x86/boot/Makefile b/xen/arch/x86/boot/Makefile
index 3a1f91240a..fb921173c2 100644
--- a/xen/arch/x86/boot/Makefile
+++ b/xen/arch/x86/boot/Makefile
@@ -1,4 +1,4 @@
-obj-y += head.o
+obj-bin-y += head.o
head.o: reloc.S
diff --git a/xen/arch/x86/boot/build32.mk b/xen/arch/x86/boot/build32.mk
index 1364d3b509..4dfb12f0a6 100644
--- a/xen/arch/x86/boot/build32.mk
+++ b/xen/arch/x86/boot/build32.mk
@@ -6,6 +6,7 @@ include $(XEN_ROOT)/Config.mk
$(call cc-options-add,CFLAGS,CC,$(EMBEDDED_EXTRA_CFLAGS))
CFLAGS += -Werror -fno-builtin -msoft-float
+CFLAGS := $(filter-out -flto,$(CFLAGS))
# NB. awk invocation is a portable alternative to 'head -n -1'
%.S: %.bin
diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
index 8ef14fd867..dc587ea920 100644
--- a/xen/arch/x86/hvm/svm/Makefile
+++ b/xen/arch/x86/hvm/svm/Makefile
@@ -1,6 +1,6 @@
obj-y += asid.o
obj-y += emulate.o
-obj-y += entry.o
+obj-bin-y += entry.o
obj-y += intr.o
obj-y += svm.o
obj-y += vmcb.o
diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
index f63aeacc22..ce06568304 100644
--- a/xen/arch/x86/hvm/vmx/Makefile
+++ b/xen/arch/x86/hvm/vmx/Makefile
@@ -1,4 +1,4 @@
-obj-y += entry.o
+obj-bin-y += entry.o
obj-y += intr.o
obj-y += realmode.o
obj-y += vmcs.o
diff --git a/xen/arch/x86/x86_32/Makefile b/xen/arch/x86/x86_32/Makefile
index 4e6889601c..4dee4b4cea 100644
--- a/xen/arch/x86/x86_32/Makefile
+++ b/xen/arch/x86/x86_32/Makefile
@@ -1,6 +1,6 @@
obj-y += domain_page.o
-obj-y += entry.o
-obj-y += gpr_switch.o
+obj-bin-y += entry.o
+obj-bin-y += gpr_switch.o
obj-y += mm.o
obj-y += seg_fixup.o
obj-y += traps.o
@@ -9,4 +9,4 @@ obj-y += pci.o
obj-$(crash_debug) += gdbstub.o
-obj-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
+obj-bin-$(supervisor_mode_kernel) += supervisor_mode_kernel.o
diff --git a/xen/arch/x86/x86_64/Makefile b/xen/arch/x86/x86_64/Makefile
index 1309ff33ba..d56e12dd1d 100644
--- a/xen/arch/x86/x86_64/Makefile
+++ b/xen/arch/x86/x86_64/Makefile
@@ -1,7 +1,7 @@
subdir-y += compat
-obj-y += entry.o
-obj-y += gpr_switch.o
+obj-bin-y += entry.o
+obj-bin-y += gpr_switch.o
obj-y += mm.o
obj-y += traps.o
obj-y += machine_kexec.o
@@ -11,7 +11,7 @@ obj-y += mmconf-fam10h.o
obj-y += mmconfig_64.o
obj-y += mmconfig-shared.o
obj-y += compat.o
-obj-y += compat_kexec.o
+obj-bin-y += compat_kexec.o
obj-y += domain.o
obj-y += physdev.o
obj-y += platform_hypercall.o
diff --git a/xen/arch/x86/x86_64/compat/Makefile b/xen/arch/x86/x86_64/compat/Makefile
index 6547e0c737..9401d494cf 100644
--- a/xen/arch/x86/x86_64/compat/Makefile
+++ b/xen/arch/x86/x86_64/compat/Makefile
@@ -1,2 +1,2 @@
-obj-y += entry.o
+obj-bin-y += entry.o
diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile
index 854e738089..470595af6f 100644
--- a/xen/common/libelf/Makefile
+++ b/xen/common/libelf/Makefile
@@ -1,4 +1,4 @@
-obj-y := libelf.o
+obj-bin-y := libelf.o
SECTIONS := text data rodata $(foreach n,1 2 4 8,rodata.str1.$(n)) $(foreach r,rel rel.ro,data.$(r) data.$(r).local)