Actually make suspending SMP domUs work: the previous commit didn't

bring the other vcpus up correctly. Signed-off-by: Steven Smith, sos22@cam.ac.uk
author: sos22@douglas.cl.cam.ac.uk <sos22@douglas.cl.cam.ac.uk> 2005-08-18 15:27:55 +0000
committer: sos22@douglas.cl.cam.ac.uk <sos22@douglas.cl.cam.ac.uk> 2005-08-18 15:27:55 +0000
commit: fd148643e4c2fe687334539077df3b526aae7a0b (patch)
tree: ded98b1c3307eb3b7ed1b360e1b0cb416feef77a
parent: 0b86412f9dad3c5cd990e51b0501f8551353e111 (diff)
download: xen-fd148643e4c2fe687334539077df3b526aae7a0b.tar.gz
xen-fd148643e4c2fe687334539077df3b526aae7a0b.tar.bz2
xen-fd148643e4c2fe687334539077df3b526aae7a0b.zip
15 files changed, 310 insertions, 34 deletions
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
index 76e5d04801..39cf60d6e4 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
@@ -115,20 +115,12 @@ void xen_idle(void)
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
-	/* Ack it */
-	__get_cpu_var(cpu_state) = CPU_DEAD;
-
-	/* We shouldn't have to disable interrupts while dead, but
-	 * some interrupts just don't seem to go away, and this makes
-	 * it "work" for testing purposes. */
 	/* Death loop */
 	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
 		HYPERVISOR_yield();
 
-	local_irq_disable();
 	__flush_tlb_all();
 	cpu_set(smp_processor_id(), cpu_online_map);
-	local_irq_enable();
 }
 #else
 static inline void play_dead(void)
@@ -156,12 +148,23 @@ void cpu_idle (void)
 			rmb();
 
 			if (cpu_is_offline(cpu)) {
+				printk("<0>Cpu %d going offline.\n",
+				       cpu);
+				local_irq_disable();
+				/* Ack it.  From this point on until
+				   we get woken up, we're not allowed
+				   to take any locks.  In particular,
+				   don't printk. */
+				__get_cpu_var(cpu_state) = CPU_DEAD;
 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
 				/* Tell hypervisor to take vcpu down. */
 				HYPERVISOR_vcpu_down(cpu);
 #endif
 				play_dead();
-         }
+				local_irq_enable();
+				printk("<0>Cpu %d back online.\n",
+				       cpu);
+			}
 
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
 			xen_idle();
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
index 56729ce885..38c2dfd883 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c
@@ -129,9 +129,12 @@ static inline int __prepare_ICR2 (unsigned int mask)
 
 DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
 
+unsigned uber_debug;
+
 static inline void __send_IPI_one(unsigned int cpu, int vector)
 {
 	unsigned int evtchn;
+	int r;
 
 	evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
 	// printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
@@ -142,7 +145,11 @@ static inline void __send_IPI_one(unsigned int cpu, int vector)
 		       synch_test_bit(evtchn, &s->evtchn_mask[0]))
 			;
 #endif
-		notify_via_evtchn(evtchn);
+		if (uber_debug)
+			printk("<0>Notifying on evtchn %d.\n", evtchn);
+		if ((r = notify_via_evtchn(evtchn)) != 0)
+			printk("<0>Hypervisor stopped us sending an IPI: %d.\n",
+			       r);
 	} else
 		printk("send_IPI to unbound port %d/%d",
 		       cpu, vector);
@@ -161,6 +168,8 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
 			if (cpu == smp_processor_id())
 				continue;
 			if (cpu_isset(cpu, cpu_online_map)) {
+				if (uber_debug)
+					printk("<0>Sending ipi to %d.\n", cpu);
 				__send_IPI_one(cpu, vector);
 			}
 		}
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
index ea82103e06..1fa524b0e8 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
@@ -1616,3 +1616,19 @@ void smp_resume(void)
 	smp_intr_init();
 	local_setup_timer_irq();
 }
+
+DECLARE_PER_CPU(int, timer_irq);
+
+void _restore_vcpu(void)
+{
+	int cpu = smp_processor_id();
+	/* We are the first thing the vcpu runs when it comes back,
+	   and we are supposed to restore the IPIs and timer
+	   interrupts etc.  When we return, the vcpu's idle loop will
+	   start up again. */
+	printk("<0>_restore_vcpu %d.\n", cpu);
+	_bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
+	_bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
+	_bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
+	_bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) );
+}
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
index 9a9e160550..3b85e34798 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
@@ -745,7 +745,7 @@ static void __init hpet_time_init(void)
 #endif
 
 /* Dynamically-mapped IRQ. */
-static DEFINE_PER_CPU(int, timer_irq);
+DEFINE_PER_CPU(int, timer_irq);
 
 static struct irqaction irq_timer = {
 	timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
diff --git a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
index a826b7e38f..e66a186487 100644
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c
@@ -512,6 +512,7 @@ no_context:
 	printk("%08lx\n", regs->eip);
 	dump_fault_path(address);
 	die("Oops", regs, error_code);
+	while(1);
 	bust_spinlocks(0);
 	do_exit(SIGKILL);
 
diff --git a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
index 2abfd67c4b..99d1c76ef0 100644
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
@@ -134,6 +134,8 @@ void force_evtchn_callback(void)
     (void)HYPERVISOR_xen_version(0);
 }
 
+extern unsigned uber_debug;
+
 /* NB. Interrupts are disabled on entry. */
 asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
 {
@@ -145,6 +147,8 @@ asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
 
     vcpu_info->evtchn_upcall_pending = 0;
     
+    if (uber_debug && cpu != 0)
+	printk("<0>evtchn_do_upcall on %d.\n", cpu);
     /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
     l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
     while ( l1 != 0 )
@@ -158,9 +162,13 @@ asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
             l2 &= ~(1 << l2i);
             
             port = (l1i << 5) + l2i;
-            if ( (irq = evtchn_to_irq[port]) != -1 )
+	    if (uber_debug && cpu != 0)
+		printk("<0>Port %d.\n", port);
+            if ( (irq = evtchn_to_irq[port]) != -1 ) {
+		if (uber_debug && cpu != 0)
+		    printk("<0>irq %d.\n", irq);
                 do_IRQ(irq, regs);
-            else
+	    } else
                 evtchn_device_upcall(port);
         }
     }
@@ -245,6 +253,71 @@ void unbind_virq_from_irq(int virq)
     spin_unlock(&irq_mapping_update_lock);
 }
 
+/* This is only used when a vcpu from an xm save.  The ipi is expected
+   to have been bound before we suspended, and so all of the xenolinux
+   state is set up; we only need to restore the Xen side of things.
+   The irq number has to be the same, but the evtchn number can
+   change. */
+void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd = EVTCHNOP_bind_ipi;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+	panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
+    evtchn = op.u.bind_ipi.port;
+
+    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
+	   ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
+	   evtchn);
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
+
+    bind_evtchn_to_cpu(evtchn, vcpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    clear_bit(evtchn, HYPERVISOR_shared_info->evtchn_mask);
+}
+
+void _bind_virq_to_irq(int virq, int cpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd              = EVTCHNOP_bind_virq;
+    op.u.bind_virq.virq = virq;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to bind virtual IRQ %d\n", virq);
+    evtchn = op.u.bind_virq.port;
+
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+    bind_evtchn_to_cpu(evtchn, cpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    return irq;
+}
+
 int bind_ipi_to_irq(int ipi)
 {
     evtchn_op_t op;
diff --git a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
index eb4d712ec7..b1e4e697d6 100644
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
@@ -65,10 +65,56 @@ static int shutting_down = SHUTDOWN_INVALID;
 #define cpu_up(x) (-EOPNOTSUPP)
 #endif
 
+static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages;
+    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+    BUG_ON(r != 0);
+    gdt_pages = (ctxt->gdt_ents + 511) / 512;
+    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++) {
+	ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+    }
+}
+
+void _restore_vcpu(int cpu);
+
+static void restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+    /* This is kind of a hack, and implicitly relies on the fact that
+       the vcpu stops in a place where all of the call clobbered
+       registers are already dead. */
+    printk("<0>regs.esp %x.\n", ctxt->user_regs.esp);
+    ctxt->user_regs.esp -= 4;
+    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
+
+    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++) {
+	ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+    }
+    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+    if (r != 0) {
+	printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+	return;
+    }
+}
+
+/* Whoever decided that printk should call into the scheduler needs to
+   be taken out and shot */
+#define msg(x) HYPERVISOR_console_io(CONSOLEIO_write, sizeof(x), x)
+
+extern unsigned uber_debug;
+
 static int __do_suspend(void *ignore)
 {
     int i, j;
     suspend_record_t *suspend_record;
+    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
 
     /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
 	/* XXX SMH: yes it would :-( */	
@@ -158,6 +204,10 @@ static int __do_suspend(void *ignore)
 
     __cli();
 
+    for (i = 0; i < NR_CPUS; i++)
+	if (cpu_isset(i, feasible_cpus))
+	    save_vcpu_context(i, &suspended_cpu_records[i]);
+
 #ifdef __i386__
     mm_pin_all();
     kmem_cache_shrink(pgd_cache);
@@ -173,7 +223,9 @@ static int __do_suspend(void *ignore)
     smp_suspend();
 #endif
 
+    msg("xenbus going down.\n");
     xenbus_suspend();
+    msg("xenbus gone down.\n");
 
     ctrl_if_suspend();
 
@@ -187,10 +239,11 @@ static int __do_suspend(void *ignore)
     memcpy(&suspend_record->resume_info, &xen_start_info,
            sizeof(xen_start_info));
 
+    msg("Suspending...\n");
     /* We'll stop somewhere inside this hypercall.  When it returns,
        we'll start resuming after the restore. */
     HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
-
+    msg("Back from suspension\n");
 
     shutting_down = SHUTDOWN_INVALID; 
 
@@ -217,7 +270,9 @@ static int __do_suspend(void *ignore)
 
     ctrl_if_resume();
 
+    msg("Here comes the xenbus...\n");
     xenbus_resume();
+    msg("xenbus resumed.\n");
 
 #ifdef CONFIG_SMP
     smp_resume();
@@ -231,21 +286,32 @@ static int __do_suspend(void *ignore)
 
     usbif_resume();
 
-    preempt_enable();
+    msg("Restoring cpu contexts...\n");
+    for (i = 0; i < NR_CPUS; i++)
+	if (cpu_isset(i, feasible_cpus))
+	    restore_vcpu_context(i, &suspended_cpu_records[i]);
+    msg("All vcpus rebooted.\n");
 
     __sti();
 
  out_reenable_cpus:
+    msg("Reenabling cpus.\n");
     while (!cpus_empty(feasible_cpus)) {
 	i = first_cpu(feasible_cpus);
+	printk("<0>Bring up %d/%d.\n", i, num_online_cpus());
+	printk("<0>17 preempt_count %x.\n", preempt_count());
 	j = cpu_up(i);
+	printk("<0>18 preempt_count %x.\n", preempt_count());
 	if (j != 0) {
 	    printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
 		   i, j);
 	    err = j;
 	}
+	printk("<0>%d up.\n", i);
 	cpu_clear(i, feasible_cpus);
     }
+    msg("Reenabled cpus.\n");
+    uber_debug = 0;
 
  out:
     if ( suspend_record != NULL )
diff --git a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
index e4b7f781e8..3491eef3c1 100644
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h
@@ -163,7 +163,7 @@ HYPERVISOR_yield(
         TRAP_INSTR
         : "=a" (ret), "=b" (ign)
 	: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
-	: "memory" );
+	: "memory", "ecx" );
 
     return ret;
 }
@@ -178,7 +178,7 @@ HYPERVISOR_block(
         TRAP_INSTR
         : "=a" (ret), "=b" (ign1)
 	: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
-	: "memory" );
+	: "memory", "ecx" );
 
     return ret;
 }
@@ -194,7 +194,7 @@ HYPERVISOR_shutdown(
         : "=a" (ret), "=b" (ign1)
 	: "0" (__HYPERVISOR_sched_op),
 	  "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -210,7 +210,7 @@ HYPERVISOR_reboot(
         : "=a" (ret), "=b" (ign1)
 	: "0" (__HYPERVISOR_sched_op),
 	  "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -228,7 +228,7 @@ HYPERVISOR_suspend(
         : "=a" (ret), "=b" (ign1), "=S" (ign2)
 	: "0" (__HYPERVISOR_sched_op),
         "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
-        "S" (srec) : "memory");
+        "S" (srec) : "memory", "ecx");
 
     return ret;
 }
@@ -244,7 +244,7 @@ HYPERVISOR_crash(
         : "=a" (ret), "=b" (ign1)
 	: "0" (__HYPERVISOR_sched_op),
 	  "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
-        : "memory" );
+        : "memory", "ecx" );
 
     return ret;
 }
@@ -534,7 +534,7 @@ HYPERVISOR_vcpu_down(
         : "=a" (ret), "=b" (ign1)
 	: "0" (__HYPERVISOR_sched_op),
 	  "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
-        : "memory" );
+        : "memory", "ecx", "edx" );
 
     return ret;
 }
@@ -550,8 +550,26 @@ HYPERVISOR_vcpu_up(
         : "=a" (ret), "=b" (ign1)
 	: "0" (__HYPERVISOR_sched_op),
 	  "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
+        : "memory", "ecx" );
+
+    return ret;
+}
+
+static inline int
+HYPERVISOR_vcpu_pickle(
+    int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int ret;
+    unsigned long ign1, ign2;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret), "=b" (ign1), "=c" (ign2)
+	: "0" (__HYPERVISOR_sched_op),
+	  "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
+	  "2" (ctxt)
         : "memory" );
 
     return ret;
 }
+
 #endif /* __HYPERCALL_H__ */
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index ad8716aac6..f52c9c5e5f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -217,8 +217,16 @@ struct vcpu *arch_alloc_vcpu_struct(void)
     return xmalloc(struct vcpu);
 }
 
+/* We assume that vcpu 0 is always the last one to be freed in a
+   domain i.e. if v->vcpu_id == 0, the domain should be
+   single-processor. */
 void arch_free_vcpu_struct(struct vcpu *v)
 {
+    struct vcpu *p;
+    for_each_vcpu(v->domain, p) {
+        if (p->next_in_list == v)
+            p->next_in_list = v->next_in_list;
+    }
     xfree(v);
 }
 
@@ -402,8 +410,10 @@ int arch_set_info_guest(
     if ( !(c->flags & VGCF_VMX_GUEST) )
     {
         if ( ((c->user_regs.cs & 3) == 0) ||
-             ((c->user_regs.ss & 3) == 0) )
-                return -EINVAL;
+             ((c->user_regs.ss & 3) == 0) ) {
+            printf("User regs.cs %x, ss %x.\n", c->user_regs.cs, c->user_regs.ss);
+            return -EINVAL;
+        }
     }
 
     clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
@@ -448,8 +458,10 @@ int arch_set_info_guest(
 
     if ( shadow_mode_refcounts(d) )
     {
-        if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
+        if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) ) {
+            printf("Bad phys_basetab %lx.\n", phys_basetab);
             return -EINVAL;
+        }
     }
     else
     {
@@ -457,13 +469,16 @@ int arch_set_info_guest(
         if ( !(c->flags & VGCF_VMX_GUEST) )
 #endif
             if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
-                  PGT_base_page_table) )
+                                    PGT_base_page_table) ) {
+                printf("Bad phys_basetab2 %lx.\n", phys_basetab);
                 return -EINVAL;
+            }
     }
 
     if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
     {
         put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
+        printf("Failed to set gdt, %d.\n", rc);
         return rc;
     }
 
@@ -485,6 +500,8 @@ int arch_set_info_guest(
     /* Don't redo final setup */
     set_bit(_VCPUF_initialised, &v->vcpu_flags);
 
+    printf("Arch set_info_guest succeeded.\n");
+
     return 0;
 }
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 78270ee57c..a920fadce9 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2631,16 +2631,25 @@ long set_gdt(struct vcpu *v,
     int i, nr_pages = (entries + 511) / 512;
     unsigned long pfn;
 
-    if ( entries > FIRST_RESERVED_GDT_ENTRY )
+    if ( entries > FIRST_RESERVED_GDT_ENTRY ) {
+        printf("Too many entries in gdt (%d).\n", entries);
         return -EINVAL;
+    }
     
     shadow_sync_all(d);
 
     /* Check the pages in the new GDT. */
-    for ( i = 0; i < nr_pages; i++ )
-        if ( ((pfn = frames[i]) >= max_page) ||
-             !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
+    for ( i = 0; i < nr_pages; i++ ) {
+        pfn = frames[i];
+        if (pfn >= max_page) {
+            printf("GDT bad as %ld >= %ld.\n", pfn, max_page);
             goto fail;
+        }
+        if (!get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) ) {
+            printf("Frame %ld looks bad.\n", pfn);
+            goto fail;
+        }
+    }
 
     /* Tear down the old GDT. */
     destroy_gdt(v);
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 81100e7a1c..657231a86b 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -831,7 +831,7 @@ static void local_time_calibration(void *unused)
     tsc_elapsed32 = (u32)tsc_elapsed64;
 
     /* tsc_elapsed > stime_elapsed */
-    ASSERT(tsc_elapsed32 != 0);
+    //    ASSERT(tsc_elapsed32 != 0);
     while ( tsc_elapsed32 <= stime_elapsed32 )
     {
         tsc_elapsed32 <<= 1;
diff --git a/xen/common/domain.c b/xen/common/domain.c
index b939d90f77..aee0b90140 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -178,6 +178,7 @@ void domain_shutdown(u8 reason)
 {
     struct domain *d = current->domain;
     struct vcpu *v;
+    int cntr;
 
     if ( d->domain_id == 0 )
     {
@@ -208,8 +209,17 @@ void domain_shutdown(u8 reason)
     }
 
     /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
-    for_each_vcpu ( d, v )
+    cntr = 0;
+    printf("Putting %d to sleep.\n", d->domain_id);
+    for_each_vcpu ( d, v ) {
+        if (test_bit(_VCPUF_down, &v->vcpu_flags)) {
+            printf("vcpu %d is down.\n", v->vcpu_id);
+        } else {
+            printf("vcpu %d is up.\n", v->vcpu_id);
+        }
         domain_sleep_nosync(v);
+    }
+    printf("Put %d vcpus to sleep for domain shutdown.\n", cntr);
 }
 
 
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 21e11a69e4..8abb035479 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -292,6 +292,8 @@ static long evtchn_bind_ipi(evtchn_bind_ipi_t *bind)
         chn = evtchn_from_port(d, port);
         chn->state          = ECS_IPI;
         chn->notify_vcpu_id = current->vcpu_id;
+        printf("Bound ipi on vcpu %d to port %d.\n", current->vcpu_id,
+               port);
     }
 
     spin_unlock(&d->evtchn_lock);
@@ -497,9 +499,24 @@ long evtchn_send(int lport)
         evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
         break;
     case ECS_IPI:
+        if (current->domain->domain_id != 0) {
+            printf("Set %d pending on %d.\n", lport,
+                   lchn->notify_vcpu_id);
+            if (lport == 7) {
+                struct vcpu *v = ld->vcpu[lchn->notify_vcpu_id];
+                struct domain *d = v->domain;
+                shared_info_t *s = d->shared_info;
+                printf("pending %x, mask %x, pending_sel %x, upcall_pending %x.\n",
+                       s->evtchn_pending[0],
+                       s->evtchn_mask[0],
+                       v->vcpu_info->evtchn_pending_sel,
+                       v->vcpu_info->evtchn_upcall_pending);
+            }
+        }
         evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
         break;
     default:
+        printf("Failed to set %d pending: state %d.\n", lport, lchn->state);
         ret = -EINVAL;
     }
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 6cf1b7858d..d3d222b35b 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -38,6 +38,8 @@
 #include <xen/mm.h>
 #include <public/sched_ctl.h>
 
+extern void arch_getdomaininfo_ctxt(struct vcpu *,
+                                    struct vcpu_guest_context *);
 /* opt_sched: scheduler - default to SEDF */
 static char opt_sched[10] = "sedf";
 string_param("sched", opt_sched);
@@ -82,7 +84,8 @@ void free_domain_struct(struct domain *d)
     int i;
 
     SCHED_OP(free_task, d);
-    for (i = 0; i < MAX_VIRT_CPUS; i++)
+    /* vcpu 0 has to be the last one destructed. */
+    for (i = MAX_VIRT_CPUS-1; i >= 0; i--)
         if ( d->vcpu[i] )
             arch_free_vcpu_struct(d->vcpu[i]);
 
@@ -295,10 +298,37 @@ static long do_vcpu_up(int vcpu)
     return 0;
 }
 
+static long do_vcpu_pickle(int vcpu, unsigned long arg)
+{
+    struct vcpu *v;
+    vcpu_guest_context_t *c;
+    int ret = 0;
+
+    if (vcpu >= MAX_VIRT_CPUS)
+        return -EINVAL;
+    v = current->domain->vcpu[vcpu];
+    if (!v)
+        return -ESRCH;
+    /* Don't pickle vcpus which are currently running */
+    if (!test_bit(_VCPUF_down, &v->vcpu_flags)) {
+        printf("Pickling a live vcpu?\n");
+        return -EBUSY;
+    }
+    c = xmalloc(vcpu_guest_context_t);
+    if (!c)
+        return -ENOMEM;
+    arch_getdomaininfo_ctxt(v, c);
+    if (copy_to_user((vcpu_guest_context_t *)arg,
+                     (const vcpu_guest_context_t *)c, sizeof(*c)))
+        ret = -EFAULT;
+    xfree(c);
+    return ret;
+}
+
 /*
  * Demultiplex scheduler-related hypercalls.
  */
-long do_sched_op(unsigned long op)
+long do_sched_op(unsigned long op, unsigned long arg)
 {
     long ret = 0;
 
@@ -334,6 +364,12 @@ long do_sched_op(unsigned long op)
         ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
         break;
     }
+    case SCHEDOP_vcpu_pickle:
+    {
+        ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg);
+        printf("Pickle result %ld.\n", ret);
+        break;
+    }
 
     default:
         ret = -ENOSYS;
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index ca156ca4f5..82b5023a31 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -203,6 +203,7 @@ struct mmuext_op {
 #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
 #define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
 #define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
+#define SCHEDOP_vcpu_pickle     5   /* save a vcpu's context to memory.   */
 #define SCHEDOP_cmdmask       255   /* 8-bit command. */
 #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
 #define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */
author	sos22@douglas.cl.cam.ac.uk <sos22@douglas.cl.cam.ac.uk>	2005-08-18 15:27:55 +0000
committer	sos22@douglas.cl.cam.ac.uk <sos22@douglas.cl.cam.ac.uk>	2005-08-18 15:27:55 +0000
commit	fd148643e4c2fe687334539077df3b526aae7a0b (patch)
tree	ded98b1c3307eb3b7ed1b360e1b0cb416feef77a
parent	0b86412f9dad3c5cd990e51b0501f8551353e111 (diff)
download	xen-fd148643e4c2fe687334539077df3b526aae7a0b.tar.gz xen-fd148643e4c2fe687334539077df3b526aae7a0b.tar.bz2 xen-fd148643e4c2fe687334539077df3b526aae7a0b.zip