/******************************************************************************
 * arch/x86/time.c
 * 
 * Per-CPU time calibration and management.
 * 
 * Copyright (c) 2002-2005, K A Fraser
 * 
 * Portions from Linux are:
 * Copyright (c) 1991, 1992, 1995  Linus Torvalds
 */

#include <xen/config.h>
#include <xen/errno.h>
#include <xen/event.h>
#include <xen/sched.h>
#include <xen/lib.h>
#include <xen/config.h>
#include <xen/init.h>
#include <xen/time.h>
#include <xen/timer.h>
#include <xen/smp.h>
#include <xen/irq.h>
#include <xen/softirq.h>
#include <xen/keyhandler.h>
#include <xen/guest_access.h>
#include <asm/io.h>
#include <asm/msr.h>
#include <asm/mpspec.h>
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/mc146818rtc.h>
#include <asm/div64.h>
#include <asm/acpi.h>
#include <asm/hpet.h>
#include <io_ports.h>
#include <asm/setup.h> /* for early_time_init */
#include <public/arch-x86/cpuid.h>

/* opt_clocksource: Force clocksource to one of: pit, hpet, cyclone, acpi. */
static char __initdata opt_clocksource[10];
string_param("clocksource", opt_clocksource);

unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
DEFINE_SPINLOCK(rtc_lock);
unsigned long pit0_ticks;
static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
static DEFINE_SPINLOCK(wc_lock);

struct cpu_time {
    u64 local_tsc_stamp;
    s_time_t stime_local_stamp;
    s_time_t stime_master_stamp;
    struct time_scale tsc_scale;
};

struct platform_timesource {
    char *id;
    char *name;
    u64 frequency;
    u64 (*read_counter)(void);
    int (*init)(struct platform_timesource *);
    void (*resume)(struct platform_timesource *);
    int counter_bits;
};

static DEFINE_PER_CPU(struct cpu_time, cpu_time);

/* Calibrate all CPUs to platform timer every EPOCH. */
#define EPOCH MILLISECS(1000)
static struct timer calibration_timer;

/*
 * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
 * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
 * softirq handling will happen in time.
 * 
 * The pit_lock protects the 16- and 32-bit stamp fields as well as the 
 */
static DEFINE_SPINLOCK(pit_lock);
static u16 pit_stamp16;
static u32 pit_stamp32;
static int using_pit;

/*
 * 32-bit division of integer dividend and integer divisor yielding
 * 32-bit fractional quotient.
 */
static inline u32 div_frac(u32 dividend, u32 divisor)
{
    u32 quotient, remainder;
    ASSERT(dividend < divisor);
    asm ( 
        "divl %4"
        : "=a" (quotient), "=d" (remainder)
        : "0" (0), "1" (dividend), "r" (divisor) );
    return quotient;
}

/*
 * 32-bit multiplication of multiplicand and fractional multiplier
 * yielding 32-bit product (radix point at same position as in multiplicand).
 */
static inline u32 mul_frac(u32 multiplicand, u32 multiplier)
{
    u32 product_int, product_frac;
    asm (
        "mul %3"
        : "=a" (product_frac), "=d" (product_int)
        : "0" (multiplicand), "r" (multiplier) );
    return product_int;
}

/*
 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
 * yielding a 64-bit result.
 */
static inline u64 scale_delta(u64 delta, struct time_scale *scale)
{
    u64 product;
#ifdef CONFIG_X86_32
    u32 tmp1, tmp2;
#endif

    if ( scale->shift < 0 )
        delta >>= -scale->shift;
    else
        delta <<= scale->shift;

#ifdef CONFIG_X86_32
    asm (
        "mul  %5       ; "
        "mov  %4,%%eax ; "
        "mov  %%edx,%4 ; "
        "mul  %5       ; "
        "xor  %5,%5    ; "
        "add  %4,%%eax ; "
        "adc  %5,%%edx ; "
        : "=A" (product), "=r" (tmp1), "=r" (tmp2)
        : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (scale->mul_frac) );
#else
    asm (
        "mul %%rdx ; shrd $32,%%rdx,%%rax"
        : "=a" (product) : "0" (delta), "d" ((u64)scale->mul_frac) );
#endif

    return product;
}

#define _TS_MUL_FRAC_IDENTITY 0x80000000UL

/* Compute the reciprocal of the given time_scale. */
static inline struct time_scale scale_reciprocal(struct time_scale scale)
{
    struct time_scale reciprocal;
    u32 dividend;

    ASSERT(scale.mul_frac != 0);
    dividend = _TS_MUL_FRAC_IDENTITY;
    reciprocal.shift = 1 - scale.shift;
    while ( unlikely(dividend >= scale.mul_frac) )
    {
        dividend >>= 1;
        reciprocal.shift++;
    }

    asm (
        "divl %4"
        : "=a" (reciprocal.mul_frac), "=d" (dividend)
        : "0" (0), "1" (dividend), "r" (scale.mul_frac) );

    return reciprocal;
}

/*
 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
 * IPIs in place of local APIC timers
 */
extern int xen_cpuidle;
static cpumask_t pit_broadcast_mask;

static void smp_send_timer_broadcast_ipi(void)
{
    int cpu = smp_processor_id();
    cpumask_t mask;

    cpus_and(mask, cpu_online_map, pit_broadcast_mask);

    if ( cpu_isset(cpu, mask) )
    {
        cpu_clear(cpu, mask);
        raise_softirq(TIMER_SOFTIRQ);
    }

    if ( !cpus_empty(mask) )
    {
        cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
    }
}

static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
{
    ASSERT(local_irq_is_enabled());

    if ( hpet_legacy_irq_tick() )
        return;

    /* Only for start-of-day interruopt tests in io_apic.c. */
    (*(volatile unsigned long *)&pit0_ticks)++;

    /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
    if ( !cpu_has_apic )
        raise_softirq(TIMER_SOFTIRQ);

    if ( xen_cpuidle )
        smp_send_timer_broadcast_ipi();

    /* Emulate a 32-bit PIT counter. */
    if ( using_pit )
    {
        u16 count;

        spin_lock_irq(&pit_lock);

        outb(0x80, PIT_MODE);
        count  = inb(PIT_CH2);
        count |= inb(PIT_CH2) << 8;

        pit_stamp32 += (u16)(pit_stamp16 - count);
        pit_stamp16 = count;

        spin_unlock_irq(&pit_lock);
    }
}

static struct irqaction __read_mostly irq0 = { timer_interrupt, "timer", NULL };

/* ------ Calibrate the TSC ------- 
 * Return processor ticks per second / CALIBRATE_FRAC.
 */

#define CLOCK_TICK_RATE 1193182 /* system crystal frequency (Hz) */
#define CALIBRATE_FRAC  20      /* calibrate over 50ms */
#define CALIBRATE_LATCH ((CLOCK_TICK_RATE+(CALIBRATE_FRAC/2))/CALIBRATE_FRAC)

static u64 init_pit_and_calibrate_tsc(void)
{
    u64 start, end;
    unsigned long count;

    /* Set PIT channel 0 to HZ Hz. */
#define LATCH (((CLOCK_TICK_RATE)+(HZ/2))/HZ)
    outb_p(0x34, PIT_MODE);        /* binary, mode 2, LSB/MSB, ch 0 */
    outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
    outb(LATCH >> 8, PIT_CH0);     /* MSB *<style>pre { line-height: 125%; margin: 0; }
td.linenos pre { color: #000000; background-color: #f0f0f0; padding: 0 5px 0 5px; }
span.linenos { color: #000000; background-color: #f0f0f0; padding: 0 5px 0 5px; }
td.linenos pre.special { color: #000000; background-color: #ffffc0; padding: 0 5px 0 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding: 0 5px 0 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight { background: #ffffff; }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */</style><div class="highlight"><pre><span></span><span class="cm">/*</span>
<span class="cm"> * patch-cmdline.c - patch the kernel command line on rb532</span>
<span class="cm"> *</span>
<span class="cm"> * Copyright (C) 2006 Felix Fietkau &lt;nbd@nbd.name&gt;</span>
<span class="cm"> *</span>
<span class="cm"> * This program is free software; you can redistribute it and/or</span>
<span class="cm"> * modify it under the terms of the GNU General Public License</span>
<span class="cm"> * as published by the Free Software Foundation; either version 2</span>
<span class="cm"> * of the License, or (at your option) any later version.</span>
<span class="cm"> *</span>
<span class="cm"> * This program is distributed in the hope that it will be useful,</span>
<span class="cm"> * but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
<span class="cm"> * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</span>
<span class="cm"> * GNU General Public License for more details.</span>
<span class="cm"> *</span>
<span class="cm"> * You should have received a copy of the GNU General Public License</span>
<span class="cm"> * along with this program; if not, write to the Free Software</span>
<span class="cm"> * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.</span>
<span class="cm"> *</span>
<span class="cm"> */</span>

<span class="cp">#include</span> <span class="cpf">&lt;stdio.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;stdlib.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;stddef.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;unistd.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;fcntl.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;sys/mman.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;sys/stat.h&gt;</span><span class="cp"></span>
<span class="cp">#include</span> <span class="cpf">&lt;string.h&gt;</span><span class="cp"></span>

<span class="cp">#define SEARCH_SPACE	(16 * 1024)</span>
<span class="cp">#define CMDLINE_MAX		512</span>

<span class="kt">int</span> <span class="nf">main</span><span class="p">(</span><span class="kt">int</span> <span class="n">argc</span><span class="p">,</span> <span class="kt">char</span> <span class="o">**</span><span class="n">argv</span><span class="p">)</span>
<span class="p">{</span>
	<span class="kt">int</span> <span class="n">fd</span><span class="p">,</span> <span class="n">found</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">len</span><span class="p">,</span> <span class="n">ret</span> <span class="o">=</span> <span class="mi">-1</span><span class="p">;</span>
	<span class="kt">char</span> <span class="o">*</span><span class="n">ptr</span><span class="p">,</span> <span class="o">*</span><span class="n">p</span><span class="p">;</span>
	<span class="kt">unsigned</span> <span class="kt">int</span> <span class="n">search_space</span><span class="p">;</span>

	<span class="k">if</span> <span class="p">(</span><span class="n">argc</span> <span class="o">&lt;=</span> <span class="mi">2</span> <span class="o">||</span> <span class="n">argc</span> <span class="o">&gt;</span> <span class="mi">4</span><span class="p">)</span> <span class="p">{</span>
		<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">&quot;Usage: %s &lt;file&gt; &lt;cmdline&gt; [size]</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">argv</span><span class="p">[</span><span class="mi">0</span><span class="p">]);</span>
		<span class="k">goto</span> <span class="n">err1</span><span class="p">;</span>
	<span class="p">}</span> <span class="k">else</span> <span class="k">if</span> <span class="p">(</span><span class="n">argc</span> <span class="o">==</span> <span class="mi">3</span><span class="p">)</span> <span class="p">{</span>
		<span class="n">fprintf</span><span class="p">(</span><span class="n">stdout</span><span class="p">,</span> <span class="s">&quot;search space used is default of 16KB</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">);</span>
		<span class="n">search_space</span> <span class="o">=</span> <span class="n">SEARCH_SPACE</span><span class="p">;</span>
	<span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
		<span class="n">search_space</span> <span class="o">=</span> <span class="n">atoi</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">3</span><span class="p">]);</span>
	<span class="p">}</span>
	<span class="n">len</span> <span class="o">=</span> <span class="n">strlen</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]);</span>
	<span class="k">if</span> <span class="p">(</span><span class="n">len</span> <span class="o">+</span> <span class="mi">9</span> <span class="o">&gt;</span> <span class="n">CMDLINE_MAX</span><span class="p">)</span> <span class="p">{</span>
		<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">&quot;Command line string too long</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">);</span>
		<span class="k">goto</span> <span class="n">err1</span><span class="p">;</span>
	<span class="p">}</span>
	
	<span class="k">if</span> <span class="p">(((</span><span class="n">fd</span> <span class="o">=</span> <span class="n">open</span><span class="p">(</span><span class="n">argv</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">O_RDWR</span><span class="p">))</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">)</span> <span class="o">||</span>
		<span class="p">(</span><span class="n">ptr</span> <span class="o">=</span> <span class="p">(</span><span class="kt">char</span> <span class="o">*</span><span class="p">)</span> <span class="n">mmap</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">search_space</span> <span class="o">+</span> <span class="n">CMDLINE_MAX</span><span class="p">,</span> <span class="n">PROT_READ</span><span class="o">|</span><span class="n">PROT_WRITE</span><span class="p">,</span> <span class="n">MAP_SHARED</span><span class="p">,</span> <span class="n">fd</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span> <span class="o">==</span> <span class="p">(</span><span class="kt">void</span> <span class="o">*</span><span class="p">)</span> <span class="p">(</span><span class="mi">-1</span><span class="p">))</span> <span class="p">{</span>
		<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">&quot;Could not open kernel image&quot;</span><span class="p">);</span>
		<span class="k">goto</span> <span class="n">err2</span><span class="p">;</span>
	<span class="p">}</span>
	
	<span class="k">for</span> <span class="p">(</span><span class="n">p</span> <span class="o">=</span> <span class="n">ptr</span><span class="p">;</span> <span class="n">p</span> <span class="o">&lt;</span> <span class="p">(</span><span class="n">ptr</span> <span class="o">+</span> <span class="n">search_space</span><span class="p">);</span> <span class="n">p</span> <span class="o">+=</span> <span class="mi">4</span><span class="p">)</span> <span class="p">{</span>
		<span class="k">if</span> <span class="p">(</span><span class="n">memcmp</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="s">&quot;CMDLINE:&quot;</span><span class="p">,</span> <span class="mi">8</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
			<span class="n">found</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>
			<span class="n">p</span> <span class="o">+=</span> <span class="mi">8</span><span class="p">;</span>
			<span class="k">break</span><span class="p">;</span>
		<span class="p">}</span>
	<span class="p">}</span>
	<span class="k">if</span> <span class="p">(</span><span class="o">!</span><span class="n">found</span><span class="p">)</span> <span class="p">{</span>
		<span class="n">fprintf</span><span class="p">(</span><span class="n">stderr</span><span class="p">,</span> <span class="s">&quot;Command line marker not found!</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">);</span>
		<span class="k">goto</span> <span class="n">err3</span><span class="p">;</span>
	<span class="p">}</span>

	<span class="n">memset</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">CMDLINE_MAX</span> <span class="o">-</span> <span class="mi">8</span><span class="p">);</span>
	<span class="n">strcpy</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">argv</span><span class="p">[</span><span class="mi">2</span><span class="p">]);</span>
	<span class="n">msync</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">CMDLINE_MAX</span><span class="p">,</span> <span class="n">MS_SYNC</span><span class="o">|</span><span class="n">MS_INVALIDATE</span><span class="p">);</span>
	<span class="n">ret</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span>

<span class="nl">err3</span><span class="p">:</span>
	<span class="n">munmap</span><span class="p">((</span><span class="kt">void</span> <span class="o">*</span><span class="p">)</span> <span class="n">ptr</span><span class="p">,</span> <span class="n">len</span><span class="p">);</span>
<span class="nl">err2</span><span class="p">:</span>
	<span class="k">if</span> <span class="p">(</span><span class="n">fd</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span>
		<span class="n">close</span><span class="p">(</span><span class="n">fd</span><span class="p">);</span>
<span class="nl">err1</span><span class="p">:</span>
	<span class="k">return</span> <span class="n">ret</span><span class="p">;</span>
<span class="p">}</span>
</pre></div>
</code></pre></td></tr></table>
</div> <!-- class=content -->
<div class='footer'>generated by <a href='https://git.zx2c4.com/cgit/about/'>cgit v1.2.3</a> (<a href='https://git-scm.com/'>git 2.25.1</a>) at 2026-02-17 09:44:43 +0000</div>
</div> <!-- id=cgit -->
</body>
</html>
uest;
    if ( !guest_handle_is_null(user_u) )
    {
        /* 1. Update userspace version. */
        __copy_field_to_guest(user_u, &_u, version);
        wmb();
        /* 2. Update all other userspavce fields. */
        __copy_to_guest(user_u, &_u, 1);
        wmb();
        /* 3. Update userspace version. */
        _u.version = version_update_end(_u.version);
        __copy_field_to_guest(user_u, &_u, version);
    }
}

void update_vcpu_system_time(struct vcpu *v)
{
    __update_vcpu_system_time(v, 0);
}

void force_update_vcpu_system_time(struct vcpu *v)
{
    __update_vcpu_system_time(v, 1);
}

void update_domain_wallclock_time(struct domain *d)
{
    uint32_t *wc_version;

    spin_lock(&wc_lock);

    wc_version = &shared_info(d, wc_version);
    *wc_version = version_update_begin(*wc_version);
    wmb();

    shared_info(d, wc_sec)  = wc_sec + d->time_offset_seconds;
    shared_info(d, wc_nsec) = wc_nsec;

    wmb();
    *wc_version = version_update_end(*wc_version);

    spin_unlock(&wc_lock);
}

static void update_domain_rtc(void)
{
    struct domain *d;

    rcu_read_lock(&domlist_read_lock);

    for_each_domain ( d )
        if ( is_hvm_domain(d) )
            rtc_update_clock(d);

    rcu_read_unlock(&domlist_read_lock);
}

void domain_set_time_offset(struct domain *d, int32_t time_offset_seconds)
{
    d->time_offset_seconds = time_offset_seconds;
    if ( is_hvm_domain(d) )
        rtc_update_clock(d);
}

int cpu_frequency_change(u64 freq)
{
    struct cpu_time *t = &this_cpu(cpu_time);
    u64 curr_tsc;

    /* Sanity check: CPU frequency allegedly dropping below 1MHz? */
    if ( freq < 1000000u )
    {
        gdprintk(XENLOG_WARNING, "Rejecting CPU frequency change "
                 "to %"PRIu64" Hz.\n", freq);
        return -EINVAL;
    }

    local_irq_disable();
    /* Platform time /first/, as we may be delayed by platform_timer_lock. */
    t->stime_master_stamp = read_platform_stime();
    /* TSC-extrapolated time may be bogus after frequency change. */
    /*t->stime_local_stamp = get_s_time();*/
    t->stime_local_stamp = t->stime_master_stamp;
    rdtscll(curr_tsc);
    t->local_tsc_stamp = curr_tsc;
    set_time_scale(&t->tsc_scale, freq);
    local_irq_enable();

    update_vcpu_system_time(current);

    /* A full epoch should pass before we check for deviation. */
    if ( smp_processor_id() == 0 )
    {
        set_timer(&calibration_timer, NOW() + EPOCH);
        platform_time_calibration();
    }

    return 0;
}

/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base)
{
    u64 x;
    u32 y, _wc_sec, _wc_nsec;
    struct domain *d;

    x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base;
    y = do_div(x, 1000000000);

    spin_lock(&wc_lock);
    wc_sec  = _wc_sec  = (u32)x;
    wc_nsec = _wc_nsec = (u32)y;
    spin_unlock(&wc_lock);

    rcu_read_lock(&domlist_read_lock);
    for_each_domain ( d )
        update_domain_wallclock_time(d);
    rcu_read_unlock(&domlist_read_lock);
}

/* Per-CPU communication between rendezvous IRQ and softirq handler. */
struct cpu_calibration {
    u64 local_tsc_stamp;
    s_time_t stime_local_stamp;
    s_time_t stime_master_stamp;
};
static DEFINE_PER_CPU(struct cpu_calibration, cpu_calibration);

/* Softirq handler for per-CPU time calibration. */
static void local_time_calibration(void)
{
    struct cpu_time *t = &this_cpu(cpu_time);
    struct cpu_calibration *c = &this_cpu(cpu_calibration);

    /*
     * System timestamps, extrapolated from local and master oscillators,
     * taken during this calibration and the previous calibration.
     */
    s_time_t prev_local_stime, curr_local_stime;
    s_time_t prev_master_stime, curr_master_stime;

    /* TSC timestamps taken during this calibration and prev calibration. */
    u64 prev_tsc, curr_tsc;

    /*
     * System time and TSC ticks elapsed during the previous calibration
     * 'epoch'. These values are down-shifted to fit in 32 bits.
     */
    u64 stime_elapsed64, tsc_elapsed64;
    u32 stime_elapsed32, tsc_elapsed32;

    /* The accumulated error in the local estimate. */
    u64 local_stime_err;

    /* Error correction to slow down a fast local clock. */
    u32 error_factor = 0;

    /* Calculated TSC shift to ensure 32-bit scale multiplier. */
    int tsc_shift = 0;

    /* The overall calibration scale multiplier. */
    u32 calibration_mul_frac;

    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
    {
        /* Atomically read cpu_calibration struct and write cpu_time struct. */
        local_irq_disable();
        t->local_tsc_stamp    = c->local_tsc_stamp;
        t->stime_local_stamp  = c->stime_master_stamp;
        t->stime_master_stamp = c->stime_master_stamp;
        local_irq_enable();
        update_vcpu_system_time(current);
        goto out;
    }

    prev_tsc          = t->local_tsc_stamp;
    prev_local_stime  = t->stime_local_stamp;
    prev_master_stime = t->stime_master_stamp;

    /* Disabling IRQs ensures we atomically read cpu_calibration struct. */
    local_irq_disable();
    curr_tsc          = c->local_tsc_stamp;
    curr_local_stime  = c->stime_local_stamp;
    curr_master_stime = c->stime_master_stamp;
    local_irq_enable();

#if 0
    printk("PRE%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64"\n",
           smp_processor_id(), prev_tsc, prev_local_stime, prev_master_stime);
    printk("CUR%d: tsc=%"PRIu64" stime=%"PRIu64" master=%"PRIu64
           " -> %"PRId64"\n",
           smp_processor_id(), curr_tsc, curr_local_stime, curr_master_stime,
           curr_master_stime - curr_local_stime);
#endif

    /* Local time warps forward if it lags behind master time. */
    if ( curr_local_stime < curr_master_stime )
        curr_local_stime = curr_master_stime;

    stime_elapsed64 = curr_master_stime - prev_master_stime;
    tsc_elapsed64   = curr_tsc - prev_tsc;

    /*
     * Weirdness can happen if we lose sync with the platform timer.
     * We could be smarter here: resync platform timer with local timer?
     */
    if ( ((s64)stime_elapsed64 < (EPOCH / 2)) )
        goto out;

    /*
     * Calculate error-correction factor. This only slows down a fast local
     * clock (slow clocks are warped forwards). The scale factor is clamped
     * to >= 0.5.
     */
    if ( curr_local_stime != curr_master_stime )
    {
        local_stime_err = curr_local_stime - curr_master_stime;
        if ( local_stime_err > EPOCH )
            local_stime_err = EPOCH;
        error_factor = div_frac(EPOCH, EPOCH + (u32)local_stime_err);
    }

    /*
     * We require 0 < stime_elapsed < 2^31.
     * This allows us to binary shift a 32-bit tsc_elapsed such that:
     * stime_elapsed < tsc_elapsed <= 2*stime_elapsed
     */
    while ( ((u32)stime_elapsed64 != stime_elapsed64) ||
            ((s32)stime_elapsed64 < 0) )
    {
        stime_elapsed64 >>= 1;
        tsc_elapsed64   >>= 1;
    }

    /* stime_master_diff now fits in a 32-bit word. */
    stime_elapsed32 = (u32)stime_elapsed64;

    /* tsc_elapsed <= 2*stime_elapsed */
    while ( tsc_elapsed64 > (stime_elapsed32 * 2) )
    {
        tsc_elapsed64 >>= 1;
        tsc_shift--;
    }

    /* Local difference must now fit in 32 bits. */
    ASSERT((u32)tsc_elapsed64 == tsc_elapsed64);
    tsc_elapsed32 = (u32)tsc_elapsed64;

    /* tsc_elapsed > stime_elapsed */
    ASSERT(tsc_elapsed32 != 0);
    while ( tsc_elapsed32 <= stime_elapsed32 )
    {
        tsc_elapsed32 <<= 1;
        tsc_shift++;
    }

    calibration_mul_frac = div_frac(stime_elapsed32, tsc_elapsed32);
    if ( error_factor != 0 )
        calibration_mul_frac = mul_frac(calibration_mul_frac, error_factor);

#if 0
    printk("---%d: %08x %08x %d\n", smp_processor_id(),
           error_factor, calibration_mul_frac, tsc_shift);
#endif

    /* Record new timestamp information, atomically w.r.t. interrupts. */
    local_irq_disable();
    t->tsc_scale.mul_frac = calibration_mul_frac;
    t->tsc_scale.shift    = tsc_shift;
    t->local_tsc_stamp    = curr_tsc;
    t->stime_local_stamp  = curr_local_stime;
    t->stime_master_stamp = curr_master_stime;
    local_irq_enable();

    update_vcpu_system_time(current);

 out:
    if ( smp_processor_id() == 0 )
    {
        set_timer(&calibration_timer, NOW() + EPOCH);
        platform_time_calibration();
    }
}

/*
 * TSC Reliability check
 */

/*
 * The Linux original version of this function is
 * Copyright (c) 2006, Red Hat, Inc., Ingo Molnar
 */
void check_tsc_warp(unsigned long tsc_khz, unsigned long *max_warp)
{
#define rdtsc_barrier() mb()
    static DEFINE_SPINLOCK(sync_lock);
    static cycles_t last_tsc;

    cycles_t start, now, prev, end;
    int i;

    rdtsc_barrier();
    start = get_cycles();
    rdtsc_barrier();

    /* The measurement runs for 20 msecs: */
    end = start + tsc_khz * 20ULL;
    now = start;

    for ( i = 0; ; i++ )
    {
        /*
         * We take the global lock, measure TSC, save the
         * previous TSC that was measured (possibly on
         * another CPU) and update the previous TSC timestamp.
         */
        spin_lock(&sync_lock);
        prev = last_tsc;
        rdtsc_barrier();
        now = get_cycles();
        rdtsc_barrier();
        last_tsc = now;
        spin_unlock(&sync_lock);

        /*
         * Be nice every now and then (and also check whether measurement is 
         * done [we also insert a 10 million loops safety exit, so we dont 
         * lock up in case the TSC readout is totally broken]):
         */
        if ( unlikely(!(i & 7)) )
        {
            if ( (now > end) || (i > 10000000) )
                break;
            cpu_relax();
            /*touch_nmi_watchdog();*/
        }

        /*
         * Outside the critical section we can now see whether we saw a 
         * time-warp of the TSC going backwards:
         */
        if ( unlikely(prev > now) )
        {
            spin_lock(&sync_lock);
            if ( *max_warp < prev - now )
                *max_warp = prev - now;
            spin_unlock(&sync_lock);
        }
    }
}

static unsigned long tsc_max_warp, tsc_check_count;
static cpumask_t tsc_check_cpumask = CPU_MASK_NONE;

static void tsc_check_slave(void *unused)
{
    unsigned int cpu = smp_processor_id();
    local_irq_disable();
    while ( !cpu_isset(cpu, tsc_check_cpumask) )
        mb();
    check_tsc_warp(cpu_khz, &tsc_max_warp);
    cpu_clear(cpu, tsc_check_cpumask);
    local_irq_enable();
}

void tsc_check_reliability(void)
{
    unsigned int cpu = smp_processor_id();
    static DEFINE_SPINLOCK(lock);

    spin_lock(&lock);

    tsc_check_count++;
    smp_call_function(tsc_check_slave, NULL, 0);
    tsc_check_cpumask = cpu_online_map;
    local_irq_disable();
    check_tsc_warp(cpu_khz, &tsc_max_warp);
    cpu_clear(cpu, tsc_check_cpumask);
    local_irq_enable();
    while ( !cpus_empty(tsc_check_cpumask) )
        cpu_relax();

    spin_unlock(&lock);
}

/*
 * Rendezvous for all CPUs in IRQ context.
 * Master CPU snapshots the platform timer.
 * All CPUS snapshot their local TSC and extrapolation of system time.
 */
struct calibration_rendezvous {
    cpumask_t cpu_calibration_map;
    atomic_t semaphore;
    s_time_t master_stime;
    u64 master_tsc_stamp;
};

/*
 * Keep TSCs in sync when they run at the same rate, but may stop in
 * deep-sleep C states.
 */
static void time_calibration_tsc_rendezvous(void *_r)
{
    int i;
    struct cpu_calibration *c = &this_cpu(cpu_calibration);
    struct calibration_rendezvous *r = _r;
    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);

    /* Loop to get rid of cache effects on TSC skew. */
    for ( i = 4; i >= 0; i-- )
    {
        if ( smp_processor_id() == 0 )
        {
            while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
                mb();

            if ( r->master_stime == 0 )
            {
                r->master_stime = read_platform_stime();
                rdtscll(r->master_tsc_stamp);
            }
            atomic_inc(&r->semaphore);

            if ( i == 0 )
                write_tsc(r->master_tsc_stamp);

            while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
                mb();
            atomic_set(&r->semaphore, 0);
        }
        else
        {
            atomic_inc(&r->semaphore);
            while ( atomic_read(&r->semaphore) < total_cpus )
                mb();

            if ( i == 0 )
                write_tsc(r->master_tsc_stamp);

            atomic_inc(&r->semaphore);
            while ( atomic_read(&r->semaphore) > total_cpus )
                mb();
        }
    }

    rdtscll(c->local_tsc_stamp);
    c->stime_local_stamp = get_s_time();
    c->stime_master_stamp = r->master_stime;

    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
}

/* Ordinary rendezvous function which does not modify TSC values. */
static void time_calibration_std_rendezvous(void *_r)
{
    struct cpu_calibration *c = &this_cpu(cpu_calibration);
    struct calibration_rendezvous *r = _r;
    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);

    if ( smp_processor_id() == 0 )
    {
        while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
            cpu_relax();
        r->master_stime = read_platform_stime();
        mb(); /* write r->master_stime /then/ signal */
        atomic_inc(&r->semaphore);
    }
    else
    {
        atomic_inc(&r->semaphore);
        while ( atomic_read(&r->semaphore) != total_cpus )
            cpu_relax();
        mb(); /* receive signal /then/ read r->master_stime */
    }

    rdtscll(c->local_tsc_stamp);
    c->stime_local_stamp = get_s_time();
    c->stime_master_stamp = r->master_stime;

    raise_softirq(TIME_CALIBRATE_SOFTIRQ);
}

static void (*time_calibration_rendezvous_fn)(void *) =
    time_calibration_std_rendezvous;

static void time_calibration(void *unused)
{
    struct calibration_rendezvous r = {
        .cpu_calibration_map = cpu_online_map,
        .semaphore = ATOMIC_INIT(0)
    };

    /* @wait=1 because we must wait for all cpus before freeing @r. */
    on_selected_cpus(&r.cpu_calibration_map,
                     time_calibration_rendezvous_fn,
                     &r, 1);
}

void init_percpu_time(void)
{
    struct cpu_time *t = &this_cpu(cpu_time);
    unsigned long flags;
    s_time_t now;

    /* Initial estimate for TSC rate. */
    this_cpu(cpu_time).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;

    local_irq_save(flags);
    rdtscll(t->local_tsc_stamp);
    now = read_platform_stime();
    local_irq_restore(flags);

    t->stime_master_stamp = now;
    t->stime_local_stamp  = now;

    if ( smp_processor_id() == 0 )
    {
        init_timer(&calibration_timer, time_calibration, NULL, 0);
        set_timer(&calibration_timer, NOW() + EPOCH);
    }
}

/* Late init function (after all CPUs are booted). */
int __init init_xen_time(void)
{
    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
    {
        /*
         * Sadly, despite processor vendors' best design guidance efforts, on
         * some systems, cpus may come out of reset improperly synchronized.
         * So we must verify there is no warp and we can't do that until all
         * CPUs are booted.
         */
        tsc_check_reliability();
        if ( tsc_max_warp )
            setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
    }

    /* If we have constant-rate TSCs then scale factor can be shared. */
    if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
    {
        /* If TSCs are not marked as 'reliable', re-sync during rendezvous. */
        if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
            time_calibration_rendezvous_fn = time_calibration_tsc_rendezvous;
    }

    open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);

    /* System time (get_s_time()) starts ticking from now. */
    rdtscll(this_cpu(cpu_time).local_tsc_stamp);

    /* NB. get_cmos_time() can take over one second to execute. */
    do_settime(get_cmos_time(), 0, NOW());

    init_platform_timer();

    init_percpu_time();

    return 0;
}


/* Early init function. */
void __init early_time_init(void)
{
    u64 tmp = init_pit_and_calibrate_tsc();

    set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);

    do_div(tmp, 1000);
    cpu_khz = (unsigned long)tmp;
    printk("Detected %lu.%03lu MHz processor.\n", 
           cpu_khz / 1000, cpu_khz % 1000);

    setup_irq(0, &irq0);
}

/* keep pit enabled for pit_broadcast working while cpuidle enabled */
static int disable_pit_irq(void)
{
    if ( using_pit || !cpu_has_apic )
        return 0;

    /*
     * If we do not rely on PIT CH0 then we can use HPET for one-shot timer 
     * emulation when entering deep C states.
     * XXX dom0 may rely on RTC interrupt delivery, so only enable
     * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
     */
    if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) )
    {
        hpet_broadcast_init();
        if ( !hpet_broadcast_is_available() )
        {
            if ( xen_cpuidle == -1 )
            {
                xen_cpuidle = 0;
                printk("CPUIDLE: disabled due to no HPET. "
                       "Force enable with 'cpuidle'.\n");
            }
            else
            {
                printk("HPET broadcast init failed, turn to PIT broadcast.\n");
                return 0;
            }
        }
    }

    /* Disable PIT CH0 timer interrupt. */
    outb_p(0x30, PIT_MODE);
    outb_p(0, PIT_CH0);
    outb_p(0, PIT_CH0);

    return 0;
}
__initcall(disable_pit_irq);

void pit_broadcast_enter(void)
{
    cpu_set(smp_processor_id(), pit_broadcast_mask);
}

void pit_broadcast_exit(void)
{
    int cpu = smp_processor_id();

    if ( cpu_test_and_clear(cpu, pit_broadcast_mask) )
        reprogram_timer(this_cpu(timer_deadline));
}

int pit_broadcast_is_available(void)
{
    return xen_cpuidle;
}

void send_timer_event(struct vcpu *v)
{
    send_guest_vcpu_virq(v, VIRQ_TIMER);
}

/* Return secs after 00:00:00 localtime, 1 January, 1970. */
unsigned long get_localtime(struct domain *d)
{
    return wc_sec + (wc_nsec + NOW()) / 1000000000ULL 
        + d->time_offset_seconds;
}

unsigned long get_sec(void)
{
    return wc_sec + (wc_nsec + NOW()) / 1000000000ULL;
}

/* "cmos_utc_offset" is the difference between UTC time and CMOS time. */
static long cmos_utc_offset; /* in seconds */

int time_suspend(void)
{
    if ( smp_processor_id() == 0 )
    {
        cmos_utc_offset = -get_cmos_time();
        cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
        kill_timer(&calibration_timer);

        /* Sync platform timer stamps. */
        platform_time_calibration();
    }

    /* Better to cancel calibration timer for accuracy. */
    clear_bit(TIME_CALIBRATE_SOFTIRQ, &softirq_pending(smp_processor_id()));

    return 0;
}

int time_resume(void)
{
    init_pit_and_calibrate_tsc();

    resume_platform_timer();

    disable_pit_irq();

    init_percpu_time();

    do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());

    update_vcpu_system_time(current);

    update_domain_rtc();

    return 0;
}

int dom0_pit_access(struct ioreq *ioreq)
{
    /* Is Xen using Channel 2? Then disallow direct dom0 access. */
    if ( using_pit )
        return 0;

    switch ( ioreq->addr )
    {
    case PIT_CH2:
        if ( ioreq->dir == IOREQ_READ )
            ioreq->data = inb(PIT_CH2);
        else
            outb(ioreq->data, PIT_CH2);
        return 1;

    case PIT_MODE:
        if ( ioreq->dir == IOREQ_READ )
            return 0; /* urk! */
        switch ( ioreq->data & 0xc0 )
        {
        case 0xc0: /* Read Back */
            if ( ioreq->data & 0x08 )    /* Select Channel 2? */
                outb(ioreq->data & 0xf8, PIT_MODE);
            if ( !(ioreq->data & 0x06) ) /* Select Channel 0/1? */
                return 1; /* no - we're done */
            /* Filter Channel 2 and reserved bit 0. */
            ioreq->data &= ~0x09;
            return 0; /* emulate ch0/1 readback */
        case 0x80: /* Select Counter 2 */
            outb(ioreq->data, PIT_MODE);
            return 1;
        }

    case 0x61:
        if ( ioreq->dir == IOREQ_READ )
            ioreq->data = inb(0x61);
        else
            outb((inb(0x61) & ~3) | (ioreq->data & 3), 0x61);
        return 1;
    }

    return 0;
}

struct tm wallclock_time(void)
{
    uint64_t seconds;

    if ( !wc_sec )
        return (struct tm) { 0 };

    seconds = NOW() + (wc_sec * 1000000000ull) + wc_nsec;
    do_div(seconds, 1000000000);
    return gmtime(seconds);
}

/*
 * PV SoftTSC Emulation.
 */

/*
 * tsc=unstable: Override all tests; assume TSC is unreliable.
 * tsc=skewed: Assume TSCs are individually reliable, but skewed across CPUs.
 */
static void __init tsc_parse(const char *s)
{
    if ( !strcmp(s, "unstable") )
    {
        setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC);
        setup_clear_cpu_cap(X86_FEATURE_NONSTOP_TSC);
        setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
    }
    else if ( !strcmp(s, "skewed") )
    {
        setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE);
    }
}
custom_param("tsc", tsc_parse);

u64 gtime_to_gtsc(struct domain *d, u64 tsc)
{
    if ( !is_hvm_domain(d) )
        tsc = max_t(s64, tsc - d->arch.vtsc_offset, 0);
    return scale_delta(tsc, &d->arch.ns_to_vtsc);
}

void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp)
{
    s_time_t now = get_s_time();
    struct domain *d = v->domain;

    spin_lock(&d->arch.vtsc_lock);

    if ( guest_kernel_mode(v, regs) )
        d->arch.vtsc_kerncount++;
    else
        d->arch.vtsc_usercount++;

    if ( (int64_t)(now - d->arch.vtsc_last) > 0 )
        d->arch.vtsc_last = now;
    else
        now = ++d->arch.vtsc_last;

    spin_unlock(&d->arch.vtsc_lock);

    now = gtime_to_gtsc(d, now);

    regs->eax = (uint32_t)now;
    regs->edx = (uint32_t)(now >> 32);

    if ( rdtscp )
         regs->ecx =
             (d->arch.tsc_mode == TSC_MODE_PVRDTSCP) ? d->arch.incarnation : 0;
}

int host_tsc_is_safe(void)
{
    return boot_cpu_has(X86_FEATURE_TSC_RELIABLE);
}

void cpuid_time_leaf(uint32_t sub_idx, uint32_t *eax, uint32_t *ebx,
                      uint32_t *ecx, uint32_t *edx)
{
    struct domain *d = current->domain;
    uint64_t offset;

    switch ( sub_idx )
    {
    case 0: /* features */
        *eax = ( ( (!!d->arch.vtsc) << 0 ) |
                 ( (!!host_tsc_is_safe()) << 1 ) |
                 ( (!!boot_cpu_has(X86_FEATURE_RDTSCP)) << 2 ) |
               0 );
        *ebx = d->arch.tsc_mode;
        *ecx = d->arch.tsc_khz;
        *edx = d->arch.incarnation;
        break;
    case 1: /* scale and offset */
        if ( !d->arch.vtsc )
            offset = d->arch.vtsc_offset;
        else
            /* offset already applied to value returned by virtual rdtscp */
            offset = 0;
        *eax = (uint32_t)offset;
        *ebx = (uint32_t)(offset >> 32);
        *ecx = d->arch.vtsc_to_ns.mul_frac;
        *edx = (s8)d->arch.vtsc_to_ns.shift;
        break;
    case 2: /* physical cpu_khz */
        *eax = cpu_khz;
        *ebx = *ecx = *edx = 0;
        break;
    default:
        *eax = *ebx = *ecx = *edx = 0;
    }
}

/*
 * called to collect tsc-related data only for save file or live
 * migrate; called after last rdtsc is done on this incarnation
 */
void tsc_get_info(struct domain *d, uint32_t *tsc_mode,
                  uint64_t *elapsed_nsec, uint32_t *gtsc_khz,
                  uint32_t *incarnation)
{
    *incarnation = d->arch.incarnation;
    *tsc_mode = d->arch.tsc_mode;

    switch ( *tsc_mode )
    {
    case TSC_MODE_NEVER_EMULATE:
        *elapsed_nsec =  *gtsc_khz = 0;
        break;
    case TSC_MODE_ALWAYS_EMULATE:
        *elapsed_nsec = get_s_time() - d->arch.vtsc_offset;
        *gtsc_khz =  d->arch.tsc_khz;
         break;
    case TSC_MODE_DEFAULT:
        if ( d->arch.vtsc )
        {
            *elapsed_nsec = get_s_time() - d->arch.vtsc_offset;
            *gtsc_khz =  d->arch.tsc_khz;
        }
        else
        {
            uint64_t tsc = 0;
            rdtscll(tsc);
            *elapsed_nsec = scale_delta(tsc,&d->arch.vtsc_to_ns);
            *gtsc_khz =  cpu_khz;
        }
        break;
    case TSC_MODE_PVRDTSCP:
        if ( d->arch.vtsc )
        {
            *elapsed_nsec = get_s_time() - d->arch.vtsc_offset;
            *gtsc_khz =  cpu_khz;
        }
        else
        {
            uint64_t tsc = 0;
            rdtscll(tsc);
            *elapsed_nsec = (scale_delta(tsc,&d->arch.vtsc_to_ns) -
                             d->arch.vtsc_offset);
            *gtsc_khz = 0; /* ignored by tsc_set_info */
        }
        break;
    }

    if ( (int64_t)*elapsed_nsec < 0 )
        *elapsed_nsec = 0;
}

/*
 * This may be called as many as three times for a domain, once when the
 * hypervisor creates the domain, once when the toolstack creates the
 * domain and, if restoring/migrating, once when saved/migrated values
 * are restored.  Care must be taken that, if multiple calls occur,
 * only the last "sticks" and all are completed before the guest executes
 * an rdtsc instruction
 */
void tsc_set_info(struct domain *d,
                  uint32_t tsc_mode, uint64_t elapsed_nsec,
                  uint32_t gtsc_khz, uint32_t incarnation)
{
    if ( d->domain_id == 0 || d->domain_id == DOMID_INVALID )
    {
        d->arch.vtsc = 0;
        return;
    }
    switch ( d->arch.tsc_mode = tsc_mode )
    {
    case TSC_MODE_NEVER_EMULATE:
        d->arch.vtsc = 0;
        break;
    case TSC_MODE_ALWAYS_EMULATE:
        d->arch.vtsc = 1;
        d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
        d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
        set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
        d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns);
        break;
    case TSC_MODE_DEFAULT:
        d->arch.vtsc = 1;
        d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
        d->arch.tsc_khz = gtsc_khz ? gtsc_khz : cpu_khz;
        set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
        /* use native TSC if initial host has safe TSC, has not migrated
         * yet and tsc_khz == cpu_khz */
        if ( host_tsc_is_safe() && incarnation == 0 &&
                d->arch.tsc_khz == cpu_khz )
            d->arch.vtsc = 0;
        else 
            d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns);
        break;
    case TSC_MODE_PVRDTSCP:
        d->arch.vtsc =  boot_cpu_has(X86_FEATURE_RDTSCP) &&
                        host_tsc_is_safe() ?  0 : 1;
        d->arch.tsc_khz = cpu_khz;
        set_time_scale(&d->arch.vtsc_to_ns, d->arch.tsc_khz * 1000 );
        d->arch.ns_to_vtsc = scale_reciprocal(d->arch.vtsc_to_ns);
        if ( d->arch.vtsc )
            d->arch.vtsc_offset = get_s_time() - elapsed_nsec;
        else {
            /* when using native TSC, offset is nsec relative to power-on
             * of physical machine */
            uint64_t tsc = 0;
            rdtscll(tsc);
            d->arch.vtsc_offset = scale_delta(tsc,&d->arch.vtsc_to_ns) -
                                  elapsed_nsec;
        }
        break;
    }
    d->arch.incarnation = incarnation + 1;
    if ( is_hvm_domain(d) )
        hvm_set_rdtsc_exiting(d, d->arch.vtsc);
}

/* vtsc may incur measurable performance degradation, diagnose with this */
static void dump_softtsc(unsigned char key)
{
    struct domain *d;
    int domcnt = 0;
    extern unsigned int max_cstate;

    tsc_check_reliability();
    if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
        printk("TSC marked as reliable, "
               "warp = %lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
    else if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC ) )
    {
        printk("TSC has constant rate, ");
        if (max_cstate <= 2 && tsc_max_warp == 0)
            printk("no deep Cstates, passed warp test, deemed reliable, ");
        else
            printk("deep Cstates possible, so not reliable, ");
        printk("warp=%lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
    } else
        printk("TSC not marked as either constant or reliable, "
               "warp=%lu (count=%lu)\n", tsc_max_warp, tsc_check_count);
    for_each_domain ( d )
    {
        if ( d->domain_id == 0 && d->arch.tsc_mode == TSC_MODE_DEFAULT )
            continue;
        printk("dom%u%s: mode=%d",d->domain_id,
                is_hvm_domain(d) ? "(hvm)" : "", d->arch.tsc_mode);
        if ( d->arch.vtsc_offset )
            printk(",ofs=0x%"PRIx64"",d->arch.vtsc_offset);
        if ( d->arch.tsc_khz )
            printk(",khz=%"PRIu32"",d->arch.tsc_khz);
        if ( d->arch.incarnation )
            printk(",inc=%"PRIu32"",d->arch.incarnation);
        if ( !(d->arch.vtsc_kerncount | d->arch.vtsc_usercount) )
        {
            printk("\n");
            continue;
        }
        if ( is_hvm_domain(d) )
            printk(",vtsc count: %"PRIu64" total\n",
                   d->arch.vtsc_kerncount);
        else
            printk(",vtsc count: %"PRIu64" kernel, %"PRIu64" user\n",
                   d->arch.vtsc_kerncount, d->arch.vtsc_usercount);
        domcnt++;
    }

    if ( !domcnt )
            printk("No domains have emulated TSC\n");
}

static struct keyhandler dump_softtsc_keyhandler = {
    .diagnostic = 1,
    .u.fn = dump_softtsc,
    .desc = "dump softtsc stats"
};

static int __init setup_dump_softtsc(void)
{
    register_keyhandler('s', &dump_softtsc_keyhandler);
    return 0;
}
__initcall(setup_dump_softtsc);

/*
 * Local variables:
 * mode: C
 * c-set-style: "BSD"
 * c-basic-offset: 4
 * tab-width: 4
 * indent-tabs-mode: nil
 * End:
 */