aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xen/TODO92
-rw-r--r--xen/arch/i386/boot/boot.S41
-rw-r--r--xen/arch/i386/entry.S174
-rw-r--r--xen/arch/i386/mm.c3
-rw-r--r--xen/arch/i386/process.c23
-rw-r--r--xen/arch/i386/traps.c98
-rw-r--r--xen/common/domain.c56
-rw-r--r--xen/common/kernel.c18
-rw-r--r--xen/common/memory.c5
-rw-r--r--xen/common/schedule.c2
-rw-r--r--xen/include/asm-i386/irq.h2
-rw-r--r--xen/include/asm-i386/ptrace.h37
-rw-r--r--xen/include/asm-i386/system.h32
-rw-r--r--xen/include/xeno/mm.h28
-rw-r--r--xen/include/xeno/sched.h4
-rw-r--r--xen/net/dev.c3
-rw-r--r--xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S22
17 files changed, 280 insertions, 360 deletions
diff --git a/xen/TODO b/xen/TODO
index a74b578db1..9f834dd958 100644
--- a/xen/TODO
+++ b/xen/TODO
@@ -7,7 +7,28 @@ longer-term goals.
-- Keir (16/3/03)
-1. ASSIGNING DOMAINS TO PROCESSORS
+1. FIX HANDLING OF NETWORK RINGS
+--------------------------------
+Handling of the transmit rings is currently very broken (for example,
+sending an inter-domain packet will wedge the hypervisor). This is
+because we may handle packets out of order (eg. inter-domain packets
+are handled eagerly, while packets for real interfaces are queued),
+but our current ring design really assumes in-order handling.
+
+A neat fix will be to allow responses to be queued in a different
+order to requests, just as we already do with block-device
+rings. We'll need to add an opaque identifier to ring entries,
+allowing matching of requests and responses, but that's about it.
+
+2. ACCURATE TIMERS AND WALL-CLOCK TIME
+--------------------------------------
+Currently our long-term timebase free runs on CPU0, with no external
+calibration. We should run ntpd on domain 0 and allow this to warp
+Xen's timebase. Once this is done, we can have a timebase per CPU and
+not worry about relative drift (since they'll all get sync'ed
+periodically by ntp).
+
+3. ASSIGNING DOMAINS TO PROCESSORS
----------------------------------
More intelligent assignment of domains to processors. In
particular, we don't play well with hyperthreading: we will assign
@@ -19,7 +40,7 @@ relationships between processors in the system (eg. which ones are
siblings in the same package). We then use this to balance domains
across packages, and across virtual processors within a package.
-2. PROPER DESTRUCTION OF DOMAINS
+4. PROPER DESTRUCTION OF DOMAINS
--------------------------------
Currently we do not free resources when destroying a domain. This is
because they may be tied up in subsystems, and there is no way of
@@ -31,20 +52,7 @@ per domain (for all its resources). When this reaches zero we know it
is safe to free everything: block-device rings, network rings, and all
the rest.
-3. FIX HANDLING OF NETWORK RINGS
---------------------------------
-Handling of the transmit rings is currently very broken (for example,
-sending an inter-domain packet will wedge the hypervisor). This is
-because we may handle packets out of order (eg. inter-domain packets
-are handled eagerly, while packets for real interfaces are queued),
-but our current ring design really assumes in-order handling.
-
-A neat fix will be to allow responses to be queued in a different
-order to requests, just as we already do with block-device
-rings. We'll need to add an opaque identifier to ring entries,
-allowing matching of requests and responses, but that's about it.
-
-4. NETWORK CHECKSUM OFFLOAD
+5. NETWORK CHECKSUM OFFLOAD
---------------------------
All the NICs that we support can checksum packets on behalf of guest
OSes. We need to add appropriate flags to and from each domain to
@@ -52,17 +60,6 @@ indicate, on transmit, which packets need the checksum added and, on
receive, which packets have been checked out as okay. We can steal
Linux's interface, which is entirely sane given NIC limitations.
-5. GDT AND LDT VIRTUALISATION
------------------------------
-We do not allow modification of the GDT, or any use of the LDT. This
-is necessary for support of unmodified applications (eg. Linux uses
-LDT in threaded applications, while Windows needs to update GDT
-entries).
-
-I have some text on how to do this:
-/usr/groups/xeno/discussion-docs/memory_management/segment_tables.txt
-It's already half implemented, but the rest is still to do.
-
6. DOMAIN 0 MANAGEMENT DAEMON
-----------------------------
A better control daemon is required for domain 0, which keeps proper
@@ -70,15 +67,7 @@ track of machine resources and can make sensible policy choices. This
may require support in Xen; for example, notifications (eg. DOMn is
killed), and requests (eg. can DOMn allocate x frames of memory?).
-7. ACCURATE TIMERS AND WALL-CLOCK TIME
---------------------------------------
-Currently our long-term timebase free runs on CPU0, with no external
-calibration. We should run ntpd on domain 0 and allow this to warp
-Xen's timebase. Once this is done, we can have a timebase per CPU and
-not worry about relative drift (since they'll all get sync'ed
-periodically by ntp).
-
-8. MODULE SUPPORT FOR XEN
+7. MODULE SUPPORT FOR XEN
-------------------------
Network and blkdev drivers are bloating Xen. At some point we want to
build drivers as modules, stick them in a cheesy ramfs, then relocate
@@ -90,7 +79,7 @@ which drivers to load.
Most of the hard stuff (relocating and the like) is done for us by
Linux's module system.
-9. NEW DESIGN FEATURES
+8. NEW DESIGN FEATURES
----------------------
This includes the last-chance page cache, and the unified buffer cache.
@@ -99,35 +88,6 @@ This includes the last-chance page cache, and the unified buffer cache.
Graveyard
*********
-Following is some description how some of the above might be
-implemented. Some of it is superceded and/or out of date, so follow
-with caution.
-
-Segment descriptor tables
--------------------------
-We want to allow guest OSes to specify GDT and LDT tables using their
-own pages of memory (just like with page tables). So allow the following:
- * new_table_entry(ptr, val)
- [Allows insertion of a code, data, or LDT descriptor into given
- location. Can simply be checked then poked, with no need to look at
- page type.]
- * new_GDT() -- relevent virtual pages are resolved to frames. Either
- (i) page not present; or (ii) page is only mapped read-only and checks
- out okay (then marked as special page). Old table is resolved first,
- and the pages are unmarked (no longer special type).
- * new_LDT() -- same as for new_GDT(), with same special page type.
-
-Page table updates must be hooked, so we look for updates to virtual page
-addresses in the GDT/LDT range. If map to not present, then old physpage
-has type_count decremented. If map to present, ensure read-only, check the
-page, and set special type.
-
-Merge set_{LDT,GDT} into update_baseptr, by passing four args:
- update_baseptrs(mask, ptab, gdttab, ldttab);
-Update of ptab requires update of gtab (or set to internal default).
-Update of gtab requires update of ltab (or set to internal default).
-
-
The hypervisor page cache
-------------------------
This will allow guest OSes to make use of spare pages in the system, but
diff --git a/xen/arch/i386/boot/boot.S b/xen/arch/i386/boot/boot.S
index 70e2d2be42..36078037d4 100644
--- a/xen/arch/i386/boot/boot.S
+++ b/xen/arch/i386/boot/boot.S
@@ -87,43 +87,30 @@ continue_boot_cpu:
xor %eax,%eax
rep stosb
- /* Copy all modules (dom0 + initrd if presetn) to safety, above 48MB */
+ /* Copy all modules (dom0 + initrd if present) out of the Xen heap */
mov (%esp),%eax
cmp $0x2BADB002,%eax
- jne 2f /* skip if magic no good */
-
- sub $__PAGE_OFFSET,%ebx /* turn back into a physaddr */
-
- mov 0x14(%ebx),%edi /* mbi->mods_count */
- dec %edi /* count-- */
-
- jb 2f /* if num modules was zero !!! */
-
+ jne skip_dom0_copy
+ sub $__PAGE_OFFSET,%ebx /* turn back into a phys addr */
+ mov 0x14(%ebx),%edi /* mb->mods_count */
+ dec %edi /* mbi->mods_count-- */
+ jb skip_dom0_copy /* skip if no modules */
mov 0x18(%ebx),%eax /* mbi->mods_addr */
-
- mov (%eax),%ebx /* mod[0]->mod_start */
-
- shl $4,%edi /* count*16 */
+ mov (%eax),%ebx /* %ebx = mod[0]->mod_start */
+ shl $4,%edi
add %edi,%eax
-
- mov 0x4(%eax),%eax /* mod[mod_count-1]->end */
-
+ mov 0x4(%eax),%eax /* %eax = mod[mod_count-1]->end */
mov %eax,%ecx
- sub %ebx,%ecx /* length in byte */
-
+ sub %ebx,%ecx /* %ecx = byte len of all mods */
mov $(MAX_DIRECTMAP_ADDRESS), %edi
- add %ecx, %edi /* src + length */
-
- shr $2,%ecx /* ecx is length/4 */
-
-1:
- sub $4,%eax /* eax = src, edi = dst */
+ add %ecx, %edi /* %edi = src + length */
+ shr $2,%ecx /* %ecx = length/4 */
+1: sub $4,%eax /* %eax = src, %edi = dst */
sub $4,%edi
mov (%eax),%ebx
mov %ebx,(%edi)
loop 1b
-
-2:
+skip_dom0_copy:
/* Initialize low and high mappings of all memory with 4MB pages */
mov $idle0_pg_table-__PAGE_OFFSET,%edi
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
index 166ceeb862..ede1fe45e5 100644
--- a/xen/arch/i386/entry.S
+++ b/xen/arch/i386/entry.S
@@ -19,12 +19,14 @@
* 18(%esp) - %eax
* 1C(%esp) - %ds
* 20(%esp) - %es
- * 24(%esp) - orig_eax
- * 28(%esp) - %eip
- * 2C(%esp) - %cs
- * 30(%esp) - %eflags
- * 34(%esp) - %oldesp
- * 38(%esp) - %oldss
+ * 24(%esp) - %fs
+ * 28(%esp) - %gs
+ * 2C(%esp) - orig_eax
+ * 30(%esp) - %eip
+ * 34(%esp) - %cs
+ * 38(%esp) - %eflags
+ * 3C(%esp) - %oldesp
+ * 40(%esp) - %oldss
*
* "current" is in register %ebx during any slow entries.
*/
@@ -56,13 +58,14 @@
* of the frame does an inter-privilege interrupt-return.
*
* Note that the "failsafe callback" uses a special stackframe:
- * { return_DS, return_ES, return_EIP, return_CS, return_EFLAGS, ... }
- * That is, original values for DS/ES are placed on stack rather than
- * in DS/ES themselves. Why? It saves us loading them, only to have them
+ * { return_DS, return_ES, return_FS, return_GS, return_EIP,
+ * return_CS, return_EFLAGS[, return_ESP, return_SS] }
+ * That is, original values for DS/ES/FS/GS are placed on stack rather than
+ * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them
* saved/restored in guest OS. Furthermore, if we load them we may cause
* a fault if they are invalid, which is a hassle to deal with. We avoid
* that problem if we don't load them :-) This property allows us to use
- * the failsafe callback as a fallback: if we ever fault on loading DS/ES
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS
* on return to ring != 0, we can simply package it up as a return via
* the failsafe callback, and let the guest OS sort it out (perhaps by
* killing an application process). Note that we also do this for any
@@ -90,12 +93,14 @@ EBP = 0x14
EAX = 0x18
DS = 0x1C
ES = 0x20
-ORIG_EAX = 0x24
-EIP = 0x28
-CS = 0x2C
-EFLAGS = 0x30
-OLDESP = 0x34
-OLDSS = 0x38
+FS = 0x24
+GS = 0x28
+ORIG_EAX = 0x2C
+EIP = 0x30
+CS = 0x34
+EFLAGS = 0x38
+OLDESP = 0x3C
+OLDSS = 0x40
/* Offsets in task_struct */
PROCESSOR = 0
@@ -113,14 +118,14 @@ EVENTS = 0
EVENTS_ENABLE = 4
/* Offsets in guest_trap_bounce */
-GTB_ERROR_CODE = 0
-GTB_CR2 = 4
-GTB_FLAGS = 8
-GTB_CS = 10
-GTB_EIP = 12
-GTBF_TRAP = 1
-GTBF_TRAP_NOCODE = 2
-GTBF_TRAP_CR2 = 4
+GTB_ERROR_CODE = 0
+GTB_CR2 = 4
+GTB_FLAGS = 8
+GTB_CS = 10
+GTB_EIP = 12
+GTBF_TRAP = 1
+GTBF_TRAP_NOCODE = 2
+GTBF_TRAP_CR2 = 4
CF_MASK = 0x00000001
IF_MASK = 0x00000200
@@ -128,6 +133,8 @@ NT_MASK = 0x00004000
#define SAVE_ALL \
cld; \
+ pushl %gs; \
+ pushl %fs; \
pushl %es; \
pushl %ds; \
pushl %eax; \
@@ -139,7 +146,8 @@ NT_MASK = 0x00004000
pushl %ebx; \
movl $(__HYPERVISOR_DS),%edx; \
movl %edx,%ds; \
- movl %edx,%es;
+ movl %edx,%es; \
+ sti;
#define RESTORE_ALL \
popl %ebx; \
@@ -151,13 +159,17 @@ NT_MASK = 0x00004000
popl %eax; \
1: popl %ds; \
2: popl %es; \
+3: popl %fs; \
+4: popl %gs; \
addl $4,%esp; \
-3: iret; \
+5: iret; \
.section .fixup,"ax"; \
-6: subl $4,%esp; \
- pushl %es; \
-5: pushl %ds; \
-4: pushl %eax; \
+10: subl $4,%esp; \
+ pushl %gs; \
+9: pushl %fs; \
+8: pushl %es; \
+7: pushl %ds; \
+6: pushl %eax; \
pushl %ebp; \
pushl %edi; \
pushl %esi; \
@@ -172,9 +184,11 @@ NT_MASK = 0x00004000
.previous; \
.section __ex_table,"a"; \
.align 4; \
- .long 1b,4b; \
- .long 2b,5b; \
- .long 3b,6b; \
+ .long 1b,6b; \
+ .long 2b,7b; \
+ .long 3b,8b; \
+ .long 4b,9b; \
+ .long 5b,10b; \
.previous
#define GET_CURRENT(reg) \
@@ -315,7 +329,22 @@ process_hyp_events:
/* No special register assumptions */
failsafe_callback:
- GET_CURRENT(%ebx)
+ # Check that we are actually returning to ring != 0 because
+ # we may fault when returning to another ring 0 activation.
+ # This can only occur when restoring FS and GS, which can be avoided
+ # by zeroing those registers and trying again. The outermost ring 0
+ # activation will do a full failsafe callback to the guest OS.
+ # Note that the outermost activation certainly has the "bad" selector
+ # value saved away, since interrupts are always disabled in ring 0
+ # until all segment registers have been saved.
+ movb CS(%esp),%al
+ test $3,%al
+ jnz 1f
+ xorl %eax,%eax
+ movl %eax,FS(%esp)
+ movl %eax,GS(%esp)
+ jmp restore_all
+1: GET_CURRENT(%ebx)
mov PROCESSOR(%ebx),%eax
shl $4,%eax
lea guest_trap_bounce(%eax),%edx
@@ -324,11 +353,15 @@ failsafe_callback:
movl FAILSAFE_SEL(%ebx),%eax
movw %ax,GTB_CS(%edx)
call create_bounce_frame
- subl $8,%esi # add DS/ES to failsafe stack frame
+ subl $16,%esi # add DS/ES/FS/GS to failsafe stack frame
movl DS(%esp),%eax
FAULT1: movl %eax,(%esi)
movl ES(%esp),%eax
FAULT2: movl %eax,4(%esi)
+ movl FS(%esp),%eax
+FAULT3: movl %eax,8(%esi)
+ movl GS(%esp),%eax
+FAULT4: movl %eax,12(%esi)
movl %esi,OLDESP(%esp)
popl %ebx
popl %ecx
@@ -337,8 +370,8 @@ FAULT2: movl %eax,4(%esi)
popl %edi
popl %ebp
popl %eax
- addl $12,%esp
-FAULT3: iret
+ addl $20,%esp # skip DS/ES/FS/GS/ORIG_EAX
+FAULT5: iret
/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK: */
@@ -354,25 +387,25 @@ create_bounce_frame:
shll $8,%eax /* multiply by 256 */
addl $init_tss + 12,%eax
movl (%eax),%esi /* tss->esp1 */
-FAULT4: movl 4(%eax),%ds /* tss->ss1 */
+FAULT6: movl 4(%eax),%ds /* tss->ss1 */
/* base of stack frame must contain ss/esp (inter-priv iret) */
subl $8,%esi
movl OLDESP+4(%esp),%eax
-FAULT5: movl %eax,(%esi)
+FAULT7: movl %eax,(%esi)
movl OLDSS+4(%esp),%eax
-FAULT6: movl %eax,4(%esi)
+FAULT8: movl %eax,4(%esi)
jmp 2f
1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
movl OLDESP+4(%esp),%esi
-FAULT7: movl OLDSS+4(%esp),%ds
+FAULT9: movl OLDSS+4(%esp),%ds
2: /* Construct a stack frame: EFLAGS, CS/EIP */
subl $12,%esi
movl EIP+4(%esp),%eax
-FAULT8: movl %eax,(%esi)
+FAULT10:movl %eax,(%esi)
movl CS+4(%esp),%eax
-FAULT9: movl %eax,4(%esi)
+FAULT11:movl %eax,4(%esi)
movl EFLAGS+4(%esp),%eax
-FAULT10:movl %eax,8(%esi)
+FAULT12:movl %eax,8(%esi)
/* Rewrite our stack frame and return to ring 1. */
/* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
andl $0xfffcbeff,%eax
@@ -390,16 +423,18 @@ FAULT10:movl %eax,8(%esi)
.align 4
.long FAULT1, kill_domain_fixup3 # Fault writing to ring-1 stack
.long FAULT2, kill_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT3, kill_domain_fixup1 # Fault executing failsafe iret
- .long FAULT4, kill_domain_fixup2 # Fault loading ring-1 stack selector
- .long FAULT5, kill_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT6, kill_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT7, kill_domain_fixup2 # Fault loading ring-1 stack selector
+ .long FAULT3, kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT4, kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT5, kill_domain_fixup1 # Fault executing failsafe iret
+ .long FAULT6, kill_domain_fixup2 # Fault loading ring-1 stack selector
+ .long FAULT7, kill_domain_fixup2 # Fault writing to ring-1 stack
.long FAULT8, kill_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT9, kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT9, kill_domain_fixup2 # Fault loading ring-1 stack selector
.long FAULT10,kill_domain_fixup2 # Fault writing to ring-1 stack
- .long FAULT11,kill_domain_fixup3 # Fault writing to ring-1 stack
- .long FAULT12,kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT11,kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT12,kill_domain_fixup2 # Fault writing to ring-1 stack
+ .long FAULT13,kill_domain_fixup3 # Fault writing to ring-1 stack
+ .long FAULT14,kill_domain_fixup3 # Fault writing to ring-1 stack
.previous
# This handler kills domains which experience unrecoverable faults.
@@ -429,12 +464,12 @@ process_guest_exception_and_events:
jnz 2f
subl $4,%esi # push error_code onto guest frame
movl %es:GTB_ERROR_CODE(%edx),%eax
-FAULT11:movl %eax,(%esi)
+FAULT13:movl %eax,(%esi)
test $GTBF_TRAP_CR2,%cl
jz 1f
subl $4,%esi # push %cr2 onto guest frame
movl %es:GTB_CR2(%edx),%eax
-FAULT12:movl %eax,(%esi)
+FAULT14:movl %eax,(%esi)
1: movl %esi,OLDESP(%esp)
2: push %es # unclobber %ds
pop %ds
@@ -463,31 +498,36 @@ ENTRY(divide_error)
pushl $ SYMBOL_NAME(do_divide_error)
ALIGN
error_code:
+ pushl %fs
+ pushl %es
pushl %ds
pushl %eax
- xorl %eax,%eax
+ xorl %eax,%eax
pushl %ebp
pushl %edi
pushl %esi
pushl %edx
- decl %eax # eax = -1
+ decl %eax # eax = -1
pushl %ecx
pushl %ebx
cld
- movl %es,%ecx
- movl ORIG_EAX(%esp), %esi # get the error code
- movl ES(%esp), %edi # get the function address
- movl %eax, ORIG_EAX(%esp)
- movl %ecx, ES(%esp)
- movl %esp,%edx
+ movl %gs,%ecx
+ movl ORIG_EAX(%esp), %esi # get the error code
+ movl GS(%esp), %edi # get the function address
+ movl %eax, ORIG_EAX(%esp)
+ movl %ecx, GS(%esp)
+ movl %esp,%edx
pushl %esi # push the error code
pushl %edx # push the pt_regs pointer
- movl $(__HYPERVISOR_DS),%edx
- movl %edx,%ds
- movl %edx,%es
+ movl $(__HYPERVISOR_DS),%edx
+ movl %edx,%ds
+ movl %edx,%es
GET_CURRENT(%ebx)
- call *%edi
- addl $8,%esp
+ call *%edi
+ # NB. We reenable interrupts AFTER exception processing, as that is
+ # required by the page fault handler (needs to save %cr2)
+ sti
+ addl $8,%esp
jmp ret_from_exception
ENTRY(coprocessor_error)
diff --git a/xen/arch/i386/mm.c b/xen/arch/i386/mm.c
index 2eeaf928d6..239aad1bbe 100644
--- a/xen/arch/i386/mm.c
+++ b/xen/arch/i386/mm.c
@@ -227,6 +227,9 @@ long do_set_gdt(unsigned long *frame_list, unsigned int entries)
current->mm.perdomain_pt[i] = mk_l1_pgentry(0);
if ( pfn == 0 ) continue;
page = frame_table + pfn;
+ ASSERT((page->flags & PG_type_mask) == PGT_gdt_page);
+ ASSERT((page->flags & PG_domain_mask) == current->domain);
+ ASSERT((page->type_count != 0) && (page->tot_count != 0));
put_page_type(page);
put_page_tot(page);
}
diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c
index 3affffcdc8..85f438f4fa 100644
--- a/xen/arch/i386/process.c
+++ b/xen/arch/i386/process.c
@@ -199,8 +199,9 @@ void show_regs(struct pt_regs * regs)
regs->eax,regs->ebx,regs->ecx,regs->edx);
printk("ESI: %08lx EDI: %08lx EBP: %08lx",
regs->esi, regs->edi, regs->ebp);
- printk(" DS: %04x ES: %04x\n",
- 0xffff & regs->xds,0xffff & regs->xes);
+ printk(" DS: %04x ES: %04x FS: %04x GS: %04x\n",
+ 0xffff & regs->xds, 0xffff & regs->xes,
+ 0xffff & regs->xfs, 0xffff & regs->xgs);
__asm__("movl %%cr0, %0": "=r" (cr0));
__asm__("movl %%cr2, %0": "=r" (cr2));
@@ -260,7 +261,7 @@ void new_thread(struct task_struct *p,
* [EAX,EBX,ECX,EDX,EDI,EBP are zero]
*/
p->thread.fs = p->thread.gs = FLAT_RING1_DS;
- regs->xds = regs->xes = regs->xss = FLAT_RING1_DS;
+ regs->xds = regs->xes = regs->xfs = regs->xgs = regs->xss = FLAT_RING1_DS;
regs->xcs = FLAT_RING1_CS;
regs->eip = start_pc;
regs->esp = start_stack;
@@ -313,8 +314,7 @@ void new_thread(struct task_struct *p,
/* NB. prev_p passed in %eax, next_p passed in %edx */
void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
- struct thread_struct *prev = &prev_p->thread,
- *next = &next_p->thread;
+ struct thread_struct *next = &next_p->thread;
struct tss_struct *tss = init_tss + smp_processor_id();
unlazy_fpu(prev_p);
@@ -327,24 +327,11 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
tss->esp1 = next->esp1;
tss->ss1 = next->ss1;
- /*
- * Save away %fs and %gs. No need to save %es and %ds, as
- * those are always kernel segments while inside the kernel.
- */
- asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs));
- asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs));
-
/* Switch GDT and LDT. */
__asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
load_LDT();
/*
- * Restore %fs and %gs.
- */
- loadsegment(fs, next->fs);
- loadsegment(gs, next->gs);
-
- /*
* Now maybe reload the debug registers
*/
if (next->debugreg[7]){
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
index a58bfc1d73..5d49f588f8 100644
--- a/xen/arch/i386/traps.c
+++ b/xen/arch/i386/traps.c
@@ -159,8 +159,9 @@ void show_registers(struct pt_regs *regs)
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
regs->esi, regs->edi, regs->ebp, esp);
- printk("ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
+ printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff,
+ regs->xfs & 0xffff, regs->xgs & 0xffff, ss);
show_stack(&regs->esp);
}
@@ -170,10 +171,11 @@ spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
void die(const char * str, struct pt_regs * regs, long err)
{
- spin_lock_irq(&die_lock);
+ unsigned long flags;
+ spin_lock_irqsave(&die_lock, flags);
printk("%s: %04lx,%04lx\n", str, err >> 16, err & 0xffff);
show_registers(regs);
- spin_unlock_irq(&die_lock);
+ spin_unlock_irqrestore(&die_lock, flags);
panic("HYPERVISOR DEATH!!\n");
}
@@ -205,6 +207,7 @@ static void inline do_trap(int trapnr, char *str,
if ( (fixup = search_exception_table(regs->eip)) != 0 )
{
regs->eip = fixup;
+ regs->xfs = regs->xgs = 0;
return;
}
@@ -264,9 +267,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
bounce_fault:
- if ( (regs->xcs &3) == 1 )
- printk("Fault at %08x (%08x)\n", addr, regs->eip); /* XXX */
-
ti = p->thread.traps + 14;
gtb->flags = GTBF_TRAP_CR2; /* page fault pushes %cr2 */
gtb->cr2 = addr;
@@ -285,7 +285,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
off = addr - LDT_VIRT_START;
addr = p->mm.ldt_base + off;
- spin_lock_irq(&p->page_lock);
+ spin_lock(&p->page_lock);
pl2e = map_domain_mem(pagetable_val(p->mm.pagetable));
l2e = l2_pgentry_val(pl2e[l2_table_offset(addr)]);
@@ -303,34 +303,30 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
if ( (page->flags & PG_type_mask) != PGT_ldt_page )
{
if ( page->type_count != 0 )
- { /* XXX */
- printk("BOGO TYPE %08lx %ld\n", page->flags, page->type_count);
goto unlock_and_bounce_fault;
- }
+
/* Check all potential LDT entries in the page. */
ldt_page = map_domain_mem(l1e & PAGE_MASK);
for ( i = 0; i < 512; i++ )
if ( !check_descriptor(ldt_page[i*2], ldt_page[i*2+1]) )
- { /* XXX */
- printk("Bad desc!!!!!\n");
goto unlock_and_bounce_fault;
- }
unmap_domain_mem(ldt_page);
+
page->flags &= ~PG_type_mask;
page->flags |= PGT_ldt_page;
- get_page_type(page);
- get_page_tot(page);
}
- p->mm.perdomain_pt[l1_table_offset(off)+16] = mk_l1_pgentry(l1e);
+ get_page_type(page);
+ get_page_tot(page);
+ p->mm.perdomain_pt[l1_table_offset(off)+16] = mk_l1_pgentry(l1e|_PAGE_RW);
- spin_unlock_irq(&p->page_lock);
+ spin_unlock(&p->page_lock);
return;
unlock_and_bounce_fault:
- spin_unlock_irq(&p->page_lock);
+ spin_unlock(&p->page_lock);
goto bounce_fault;
@@ -339,6 +335,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
if ( (fixup = search_exception_table(regs->eip)) != 0 )
{
regs->eip = fixup;
+ regs->xfs = regs->xgs = 0;
return;
}
@@ -420,8 +417,8 @@ asmlinkage void do_general_protection(struct pt_regs *regs, long error_code)
if ( (fixup = search_exception_table(regs->eip)) != 0 )
{
- printk("Hmmmm %08lx -> %08lx (%04lx)\n", regs->eip, fixup, error_code);
regs->eip = fixup;
+ regs->xfs = regs->xgs = 0;
return;
}
@@ -565,31 +562,14 @@ do { \
"3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
} while (0)
-
-/*
- * This needs to use 'idt_table' rather than 'idt', and
- * thus use the _nonmapped_ version of the IDT, as the
- * Pentium F0 0F bugfix can have resulted in the mapped
- * IDT being write-protected.
- */
void set_intr_gate(unsigned int n, void *addr)
{
_set_gate(idt_table+n,14,0,addr);
}
-static void __init set_trap_gate(unsigned int n, void *addr)
-{
- _set_gate(idt_table+n,15,0,addr);
-}
-
static void __init set_system_gate(unsigned int n, void *addr)
{
- _set_gate(idt_table+n,15,3,addr);
-}
-
-static void __init set_call_gate(void *a, void *addr)
-{
- _set_gate(a,12,3,addr);
+ _set_gate(idt_table+n,14,3,addr);
}
#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
@@ -620,29 +600,37 @@ void set_tss_desc(unsigned int n, void *addr)
void __init trap_init(void)
{
- set_trap_gate(0,&divide_error);
- set_trap_gate(1,&debug);
+ /*
+ * Note that interrupt gates are always used, rather than trap gates. We
+ * must have interrupts disabled until DS/ES/FS/GS are saved because the
+ * first activation must have the "bad" value(s) for these registers and
+ * we may lose them if another activation is installed before they are
+ * saved. The page-fault handler also needs interrupts disabled until %cr2
+ * has been read and saved on the stack.
+ */
+ set_intr_gate(0,&divide_error);
+ set_intr_gate(1,&debug);
set_intr_gate(2,&nmi);
set_system_gate(3,&int3); /* usable from all privilege levels */
set_system_gate(4,&overflow); /* usable from all privilege levels */
- set_trap_gate(5,&bounds);
- set_trap_gate(6,&invalid_op);
- set_trap_gate(7,&device_not_available);
- set_trap_gate(8,&double_fault);
- set_trap_gate(9,&coprocessor_segment_overrun);
- set_trap_gate(10,&invalid_TSS);
- set_trap_gate(11,&segment_not_present);
- set_trap_gate(12,&stack_segment);
- set_trap_gate(13,&general_protection);
+ set_intr_gate(5,&bounds);
+ set_intr_gate(6,&invalid_op);
+ set_intr_gate(7,&device_not_available);
+ set_intr_gate(8,&double_fault);
+ set_intr_gate(9,&coprocessor_segment_overrun);
+ set_intr_gate(10,&invalid_TSS);
+ set_intr_gate(11,&segment_not_present);
+ set_intr_gate(12,&stack_segment);
+ set_intr_gate(13,&general_protection);
set_intr_gate(14,&page_fault);
- set_trap_gate(15,&spurious_interrupt_bug);
- set_trap_gate(16,&coprocessor_error);
- set_trap_gate(17,&alignment_check);
- set_trap_gate(18,&machine_check);
- set_trap_gate(19,&simd_coprocessor_error);
+ set_intr_gate(15,&spurious_interrupt_bug);
+ set_intr_gate(16,&coprocessor_error);
+ set_intr_gate(17,&alignment_check);
+ set_intr_gate(18,&machine_check);
+ set_intr_gate(19,&simd_coprocessor_error);
/* Only ring 1 can access monitor services. */
- _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,15,1,&hypervisor_call);
+ _set_gate(idt_table+HYPERVISOR_CALL_VECTOR,14,1,&hypervisor_call);
/* CPU0 uses the master IDT. */
idt_tables[0] = idt_table;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 4b0d69cb4b..10b96042a7 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -392,20 +392,19 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
/* Sanity! */
if ( p->domain != 0 ) BUG();
- /* This is all a bit grim. We've moved the modules to the "safe"
- physical memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later
- in this routeine, we're going to copy it down into the region
- that's actually been allocated to domain 0. This is highly likely
- to be overlapping, so we use a forward copy.
-
- MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine
- with 4GB and lots of network/disk cards that allocate loads of
- buffers. We'll have to revist this if we ever support PAE (64GB).
-
- */
-
+ /*
+ * This is all a bit grim. We've moved the modules to the "safe" physical
+ * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
+ * routeine, we're going to copy it down into the region that's actually
+ * been allocated to domain 0. This is highly likely to be overlapping, so
+ * we use a forward copy.
+ *
+ * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
+ * 4GB and lots of network/disk cards that allocate loads of buffers.
+ * We'll have to revist this if we ever support PAE (64GB).
+ */
- data_start = map_domain_mem( (unsigned long) phy_data_start );
+ data_start = map_domain_mem((unsigned long)phy_data_start);
if ( strncmp(data_start, "XenoGues", 8) )
{
@@ -480,7 +479,7 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
if ( count < p->tot_pages )
{
page = frame_table + (cur_address >> PAGE_SHIFT);
- page->flags = dom | PGT_writeable_page;
+ page->flags = dom | PGT_writeable_page | PG_need_flush;
page->type_count = page->tot_count = 1;
/* Set up the MPT entry. */
machine_to_phys_mapping[cur_address >> PAGE_SHIFT] = count;
@@ -558,24 +557,21 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
__write_cr3_counted(pagetable_val(p->mm.pagetable));
/* Copy the guest OS image. */
- src = (char *)(phy_data_start + 12);
- vsrc= (char *)(data_start + 12); /* data_start invalid after first page*/
- dst = (char *)virt_load_address;
+ src = (char *)(phy_data_start + 12);
+ vsrc = (char *)(data_start + 12); /* data_start invalid after first page*/
+ dst = (char *)virt_load_address;
while ( src < (phy_data_start+data_len) )
- {
+ {
*dst++ = *vsrc++;
src++;
-
if ( (((unsigned long)src) & (PAGE_SIZE-1)) == 0 )
- {
+ {
unmap_domain_mem( vsrc-1 );
vsrc = map_domain_mem( (unsigned long)src );
- }
- }
+ }
+ }
unmap_domain_mem( vsrc );
-
- printk("copy done\n");
-
+
/* Set up start info area. */
memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
virt_startinfo_address->nr_pages = p->tot_pages;
@@ -585,13 +581,13 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
((p->tot_pages - 1) << PAGE_SHIFT);
if ( initrd_len )
- {
+ {
virt_startinfo_address->mod_start = (unsigned long)dst-initrd_len;
virt_startinfo_address->mod_len = initrd_len;
-
- printk("Initrd len 0x%x, start at 0x%08x\n",
- virt_startinfo_address->mod_len, virt_startinfo_address->mod_start);
- }
+ printk("Initrd len 0x%lx, start at 0x%08lx\n",
+ virt_startinfo_address->mod_len,
+ virt_startinfo_address->mod_start);
+ }
/* Add virtual network interfaces and point to them in startinfo. */
while (params->num_vifs-- > 0) {
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index b2316f550d..ad321af3fb 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -192,18 +192,18 @@ void cmain (unsigned long magic, multiboot_info_t *mbi)
new_dom = do_newdomain(0, 0);
if ( new_dom == NULL ) panic("Error creating domain 0\n");
- /* We're going to setup domain0 using the module(s) that we
- stashed safely above our MAX_DIRECTMAP_ADDRESS in boot/Boot.S
-
- The second module, if present, is an initrd ramdisk
+ /*
+ * We're going to setup domain0 using the module(s) that we stashed safely
+ * above our MAX_DIRECTMAP_ADDRESS in boot/Boot.S The second module, if
+ * present, is an initrd ramdisk
*/
-
if ( setup_guestos(new_dom,
&dom0_params,
- MAX_DIRECTMAP_ADDRESS,
- mod[mbi->mods_count-1].mod_end - mod[0].mod_start, __va(mod[0].string),
- (mbi->mods_count==2)?
- (mod[1].mod_end - mod[1].mod_start):0)
+ (char *)MAX_DIRECTMAP_ADDRESS,
+ mod[mbi->mods_count-1].mod_end - mod[0].mod_start,
+ __va(mod[0].string),
+ (mbi->mods_count == 2) ?
+ (mod[1].mod_end - mod[1].mod_start):0)
!= 0 ) panic("Could not set up DOM0 guest OS\n");
update_dom_time(new_dom->shared_info);
diff --git a/xen/common/memory.c b/xen/common/memory.c
index b7daecbf93..5684aada28 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -176,7 +176,7 @@
#include <asm/uaccess.h>
#include <asm/domain_page.h>
-#if 1
+#if 0
#define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
#else
#define MEM_LOG(_f, _a...) ((void)0)
@@ -724,6 +724,9 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
if ( pfn == 0 ) continue;
current->mm.perdomain_pt[i] = mk_l1_pgentry(0);
page = frame_table + pfn;
+ ASSERT((page->flags & PG_type_mask) == PGT_ldt_page);
+ ASSERT((page->flags & PG_domain_mask) == current->domain);
+ ASSERT((page->type_count != 0) && (page->tot_count != 0));
put_page_type(page);
put_page_tot(page);
}
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 59639e4e8c..cabb71a9c5 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -394,7 +394,7 @@ asmlinkage void schedule(void)
#ifndef NDEBUG
if (r_time < ctx_allow) {
- printk("[%02d]: %lx\n", this_cpu, r_time);
+ printk("[%02d]: %lx\n", this_cpu, (unsigned long)r_time);
dump_rqueue(&schedule_data[this_cpu].runqueue, "foo");
}
#endif
diff --git a/xen/include/asm-i386/irq.h b/xen/include/asm-i386/irq.h
index 21c24f4483..5f0e75ea4d 100644
--- a/xen/include/asm-i386/irq.h
+++ b/xen/include/asm-i386/irq.h
@@ -102,6 +102,8 @@ extern char _stext, _etext;
#define SAVE_ALL \
"cld\n\t" \
+ "pushl %gs\n\t" \
+ "pushl %fs\n\t" \
"pushl %es\n\t" \
"pushl %ds\n\t" \
"pushl %eax\n\t" \
diff --git a/xen/include/asm-i386/ptrace.h b/xen/include/asm-i386/ptrace.h
index 540a3b372a..6570cc4e79 100644
--- a/xen/include/asm-i386/ptrace.h
+++ b/xen/include/asm-i386/ptrace.h
@@ -1,28 +1,6 @@
#ifndef _I386_PTRACE_H
#define _I386_PTRACE_H
-#define EBX 0
-#define ECX 1
-#define EDX 2
-#define ESI 3
-#define EDI 4
-#define EBP 5
-#define EAX 6
-#define DS 7
-#define ES 8
-#define FS 9
-#define GS 10
-#define ORIG_EAX 11
-#define EIP 12
-#define CS 13
-#define EFL 14
-#define UESP 15
-#define SS 16
-#define FRAME_SIZE 17
-
-/* this struct defines the way the registers are stored on the
- stack during a system call. */
-
struct pt_regs {
long ebx;
long ecx;
@@ -33,6 +11,8 @@ struct pt_regs {
long eax;
int xds;
int xes;
+ int xfs;
+ int xgs;
long orig_eax;
long eip;
int xcs;
@@ -41,19 +21,6 @@ struct pt_regs {
int xss;
};
-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
-#define PTRACE_GETFPXREGS 18
-#define PTRACE_SETFPXREGS 19
-
-#define PTRACE_SETOPTIONS 21
-
-/* options set using PTRACE_SETOPTIONS */
-#define PTRACE_O_TRACESYSGOOD 0x00000001
-
enum EFLAGS {
EF_CF = 0x00000001,
EF_PF = 0x00000004,
diff --git a/xen/include/asm-i386/system.h b/xen/include/asm-i386/system.h
index a24c5894ef..4200a1051d 100644
--- a/xen/include/asm-i386/system.h
+++ b/xen/include/asm-i386/system.h
@@ -4,8 +4,9 @@
#include <xeno/config.h>
#include <asm/bitops.h>
-struct task_struct; /* one of the stranger aspects of C forward declarations.. */
-extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
+struct task_struct;
+extern void FASTCALL(__switch_to(struct task_struct *prev,
+ struct task_struct *next));
#define prepare_to_switch() do { } while(0)
#define switch_to(prev,next) do { \
@@ -33,30 +34,7 @@ extern void FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *n
:"memory"); \
} while (0)
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg,value) \
- asm volatile("\n" \
- "1:\t" \
- "movl %0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "pushl $0\n\t" \
- "popl %%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n\t" \
- ".align 4\n\t" \
- ".long 1b,3b\n" \
- ".previous" \
- : :"m" (*(unsigned int *)&(value)))
-
-/*
- * Clear and set 'TS' bit respectively
- */
+/* Clear and set 'TS' bit respectively */
#define clts() __asm__ __volatile__ ("clts")
#define read_cr0() ({ \
unsigned int __dummy; \
@@ -152,7 +130,7 @@ static inline void __set_64bit_var (unsigned long long *ptr,
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
- * but generally the primitive is invalid, *ptr is output argument. --ANK
+ * but generally the primitive is invalid, *ptr is output argument. --ANK
*/
static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
{
diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h
index 8f75b61f5f..f257caffc7 100644
--- a/xen/include/xeno/mm.h
+++ b/xen/include/xeno/mm.h
@@ -67,12 +67,14 @@ typedef struct pfn_info {
#define REFCNT_PIN_BIT 0x40000000UL
#define get_page_tot(p) ((p)->tot_count++)
-#define put_page_tot(p) (--(p)->tot_count)
+#define put_page_tot(p) \
+ ({ ASSERT((p)->tot_count != 0); --(p)->tot_count; })
#define page_tot_count(p) ((p)->tot_count)
#define set_page_tot_count(p,v) ((p)->tot_count = v)
#define get_page_type(p) ((p)->type_count++)
-#define put_page_type(p) (--(p)->type_count)
+#define put_page_type(p) \
+ ({ ASSERT((p)->type_count != 0); --(p)->type_count; })
#define page_type_count(p) ((p)->type_count)
#define set_page_type_count(p,v) ((p)->type_count = v)
@@ -95,18 +97,18 @@ typedef struct pfn_info {
#define PGT_gdt_page (5<<24) /* using this page in a GDT? */
#define PGT_ldt_page (6<<24) /* using this page in an LDT? */
#define PGT_writeable_page (7<<24) /* has writable mappings of this page? */
-#define PGT_net_rx_buf (8<<24) /* this page has been pirated by the net code. */
+#define PGT_net_rx_buf (8<<24) /* this page taken by the net code. */
/*
* This bit indicates that the TLB must be flushed when the type count of this
* frame drops to zero. This is needed on current x86 processors only for
- * frames which have guestos-accessible writeable mappings. In this case we must
- * prevent stale TLB entries allowing the frame to be written if it used for a
- * page table, for example.
+ * frames which have guestos-accessible writeable mappings. In this case we
+ * must prevent stale TLB entries allowing the frame to be written if it used
+ * for a page table, for example.
*
- * We have this bit because the writeable type is actually also used to pin a page
- * when it is used as a disk read buffer. This doesn't require a TLB flush because
- * the frame never has a mapping in the TLB.
+ * We have this bit because the writeable type is actually also used to pin a
+ * page when it is used as a disk read buffer. This doesn't require a TLB flush
+ * because the frame never has a mapping in the TLB.
*/
#define PG_need_flush (1<<28)
@@ -114,10 +116,10 @@ typedef struct pfn_info {
#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags)
#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
-#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
- do { \
- (_pfn)->flags = (_dom) | PGT_writeable_page; \
- (_pfn)->tot_count = (_pfn)->type_count = 1; \
+#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
+ do { \
+ (_pfn)->flags = (_dom) | PGT_writeable_page | PG_need_flush; \
+ (_pfn)->tot_count = (_pfn)->type_count = 2; \
} while ( 0 )
#define UNSHARE_PFN(_pfn) \
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
index 2dd43ccb17..8db8cdc1fb 100644
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -80,8 +80,8 @@ struct task_struct {
* Return vectors pushed to us by guest OS.
* The stack frame for events is exactly that of an x86 hardware interrupt.
* The stack frame for a failsafe callback is augmented with saved values
- * for segment registers %ds and %es:
- * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss]
+ * for segment registers %ds, %es, %fs and %gs:
+ * %ds, %es, %fs, %gs, %eip, %cs, %eflags [, %oldesp, %oldss]
*/
unsigned long event_selector; /* 20: entry CS */
unsigned long event_address; /* 24: entry EIP */
diff --git a/xen/net/dev.c b/xen/net/dev.c
index 34caf14aed..d29eb446d3 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -521,7 +521,7 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
g_pfn->tot_count = g_pfn->type_count = 0;
h_pfn->flags = g_pfn->flags & ~PG_type_mask;
- if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page;
+ if (*g_pte & _PAGE_RW) h_pfn->flags |= PGT_writeable_page | PG_need_flush;
g_pfn->flags = 0;
/* Point the guest at the new machine frame. */
@@ -567,7 +567,6 @@ int netif_rx(struct sk_buff *skb)
local_irq_save(flags);
ASSERT(skb->skb_type == SKB_ZERO_COPY);
- ASSERT((skb->data - skb->head) == (18 + ETH_HLEN));
/*
* Offset will include 16 bytes padding from dev_alloc_skb, 14 bytes for
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S
index 0525e2976e..20ae79e50e 100644
--- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S
+++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S
@@ -373,13 +373,19 @@ critical_fixup_table:
ENTRY(failsafe_callback)
1: pop %ds
2: pop %es
-3: iret
+3: pop %fs
+4: pop %gs
+5: iret
.section .fixup,"ax"; \
-4: movl $0,(%esp); \
+6: movl $0,(%esp); \
jmp 1b; \
-5: movl $0,(%esp); \
+7: movl $0,(%esp); \
jmp 2b; \
-6: pushl %ss; \
+8: movl $0,(%esp); \
+ jmp 3b; \
+9: movl $0,(%esp); \
+ jmp 4b; \
+10: pushl %ss; \
popl %ds; \
pushl %ss; \
popl %es; \
@@ -388,9 +394,11 @@ ENTRY(failsafe_callback)
.previous; \
.section __ex_table,"a";\
.align 4; \
- .long 1b,4b; \
- .long 2b,5b; \
- .long 3b,6b; \
+ .long 1b,6b; \
+ .long 2b,7b; \
+ .long 3b,8b; \
+ .long 4b,9b; \
+ .long 5b,10b; \
.previous
ENTRY(coprocessor_error)