x86: Fix cpu offline bug: add clflush inside dead loop

At some platform (like Xen 7400), when hyperthreading, an offlined thread may waked spuriously up by its brother, and returning around the loop. This patch explicitly clflush the cache line in a light weight way to workaround potential issue. Unlike wbinvd, clflush is not serializing instruction, hence memory fence is necessary to make sure all load/store operation visible before flush cache line. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> xen-unstable changeset: 23024:fb3950888154 xen-unstable date: Fri Mar 11 17:18:53 2011 +0000 x86: Fix cache flush bug of cpu offline Current xen cpu offline logic flush cache too early, which potentially break cache coherency. wbinvd should be the last ops before cpu going into dead, otherwise cache may be dirty, i.e, something like setting an A bit on page tables. Pointed out by Arjan van de Ven. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> xen-unstable changeset: 23023:552c3059264e xen-unstable date: Fri Mar 11 17:18:01 2011 +0000 x86: Fix cpu offline bug: cancel SYSIO method when play dead Play dead is a fragile and tricky point of cpu offline logic. For how to play cpu dead, linux kernel changed several times: Very old kernel support 3 ways to play cpu dead: mwait, SYSIO, and halt, just like what cpuidle did when enter C3; Later, it cancel mwait and SYSIO support, only use halt to play dead; Latest linux 2.6.38 add mwait support when cpu dead. This patch cancel SYSIO method when cpu dead, keep same with latest kernel. SYSIO is an obsoleted method to enter deep C, with some tricky hardware behavior, and seldom supported in new platform. Xen experiment indicate that when cpu dead, SYSIO method would trigger unknown issue which would bring strange error. We now cancel SYSIO method when cpu dead, after all, correctness is more important than power save, and btw new platform use mwait. Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> xen-unstable changeset: 23022:6c5e68521f1d xen-unstable date: Fri Mar 11 16:27:33 2011 +0000
author: Liu, Jinsong <jinsong.liu@intel.com> 2011-03-11 17:33:30 +0000
committer: Liu, Jinsong <jinsong.liu@intel.com> 2011-03-11 17:33:30 +0000
commit: e91080a5a78c1f51fe887e2151c04d6a5c5d584d (patch)
tree: bb215e53663c8bc6f617e608d903cfc00d3d63aa
parent: ae2d7af49d7c71267f47614150e1d42315ea6607 (diff)
download: xen-e91080a5a78c1f51fe887e2151c04d6a5c5d584d.tar.gz
xen-e91080a5a78c1f51fe887e2151c04d6a5c5d584d.tar.bz2
xen-e91080a5a78c1f51fe887e2151c04d6a5c5d584d.zip
2 files changed, 32 insertions, 17 deletions
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index 7cbcceeb7a..3f947841d0 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -554,7 +554,7 @@ static void acpi_dead_idle(void)
 {
     struct acpi_processor_power *power;
     struct acpi_processor_cx *cx;
-    int unused;
+    void *mwait_ptr;
 
     if ( (power = processor_powers[smp_processor_id()]) == NULL )
         goto default_halt;
@@ -562,28 +562,38 @@ static void acpi_dead_idle(void)
     if ( (cx = &power->states[power->count-1]) == NULL )
         goto default_halt;
 
-    for ( ; ; )
+    mwait_ptr = (void *)&mwait_wakeup(smp_processor_id());
+
+    if ( cx->entry_method == ACPI_CSTATE_EM_FFH )
     {
-        if ( !power->flags.bm_check && cx->type == ACPI_STATE_C3 )
-            ACPI_FLUSH_CPU_CACHE();
+        /*
+         * cache must be flashed as the last ops before cpu going into dead,
+         * otherwise, cpu may dead with dirty data breaking cache coherency,
+         * leading to strange errors.
+         */
+        wbinvd();
 
-        switch ( cx->entry_method )
+        while ( 1 )
         {
-            case ACPI_CSTATE_EM_FFH:
-                /* Not treat interrupt as break event */
-                __monitor((void *)&mwait_wakeup(smp_processor_id()), 0, 0);
-                __mwait(cx->address, 0);
-                break;
-            case ACPI_CSTATE_EM_SYSIO:
-                inb(cx->address);
-                unused = inl(pmtmr_ioport);
-                break;
-            default:
-                goto default_halt;
+            /*
+             * 1. The CLFLUSH is a workaround for erratum AAI65 for
+             * the Xeon 7400 series.  
+             * 2. The WBINVD is insufficient due to the spurious-wakeup
+             * case where we return around the loop.
+             * 3. Unlike wbinvd, clflush is a light weight but not serializing 
+             * instruction, hence memory fence is necessary to make sure all 
+             * load/store visible before flush cache line.
+             */
+            mb();
+            clflush(mwait_ptr);
+            __monitor(mwait_ptr, 0, 0);
+            mb();
+            __mwait(cx->address, 0);
         }
     }
 
 default_halt:
+    wbinvd();
     for ( ; ; )
         halt();
 }
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 09da6221af..a024e198e9 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -93,6 +93,12 @@ static void default_idle(void)
 
 static void default_dead_idle(void)
 {
+    /*
+     * cache must be flashed as the last ops before cpu going into dead,
+     * otherwise, cpu may dead with dirty data breaking cache coherency,
+     * leading to strange errors.
+     */
+    wbinvd();
     for ( ; ; )
         halt();
 }
@@ -100,7 +106,6 @@ static void default_dead_idle(void)
 static void play_dead(void)
 {
     local_irq_disable();
-    wbinvd();
 
     /*
      * NOTE: After cpu_exit_clear, per-cpu variables are no longer accessible,
author	Liu, Jinsong <jinsong.liu@intel.com>	2011-03-11 17:33:30 +0000
committer	Liu, Jinsong <jinsong.liu@intel.com>	2011-03-11 17:33:30 +0000
commit	e91080a5a78c1f51fe887e2151c04d6a5c5d584d (patch)
tree	bb215e53663c8bc6f617e608d903cfc00d3d63aa
parent	ae2d7af49d7c71267f47614150e1d42315ea6607 (diff)
download	xen-e91080a5a78c1f51fe887e2151c04d6a5c5d584d.tar.gz xen-e91080a5a78c1f51fe887e2151c04d6a5c5d584d.tar.bz2 xen-e91080a5a78c1f51fe887e2151c04d6a5c5d584d.zip