aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch/x86/domain_page.c
blob: bc1826334bf5dc2af4cd59a01afd009157fb6961 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/******************************************************************************
 * domain_page.h
 *
 * Allow temporary mapping of domain pages.
 *
 * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
 */

#include <xen/domain_page.h>
#include <xen/mm.h>
#include <xen/perfc.h>
#include <xen/pfn.h>
#include <xen/sched.h>
#include <xen/vmap.h>
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/hardirq.h>

static struct vcpu *__read_mostly override;

static inline struct vcpu *mapcache_current_vcpu(void)
{
    /* In the common case we use the mapcache of the running VCPU. */
    struct vcpu *v = override ?: current;

    /*
     * When current isn't properly set up yet, this is equivalent to
     * running in an idle vCPU (callers must check for NULL).
     */
    if ( v == (struct vcpu *)0xfffff000 )
        return NULL;

    /*
     * If guest_table is NULL, and we are running a paravirtualised guest,
     * then it means we are running on the idle domain's page table and must
     * therefore use its mapcache.
     */
    if ( unlikely(pagetable_is_null(v->arch.guest_table)) && !is_hvm_vcpu(v) )
    {
        /* If we really are idling, perform lazy context switch now. */
        if ( (v = idle_vcpu[smp_processor_id()]) == current )
            sync_local_execstate();
        /* We must now be running on the idle page table. */
        ASSERT(read_cr3() == __pa(idle_pg_table));
    }

    return v;
}

void __init mapcache_override_current(struct vcpu *v)
{
    override = v;
}

#define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
#define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
#define MAPCACHE_L1ENT(idx) \
    __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))]

void *map_domain_page(unsigned long mfn)
{
    unsigned long flags;
    unsigned int idx, i;
    struct vcpu *v;
    struct mapcache_domain *dcache;
    struct mapcache_vcpu *vcache;
    struct vcpu_maphash_entry *hashent;

#ifdef NDEBUG
    if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
        return mfn_to_virt(mfn);
#endif

    v = mapcache_current_vcpu();
    if ( !v || is_hvm_vcpu(v) )
        return mfn_to_virt(mfn);

    dcache = &v->domain->arch.pv_domain.mapcache;
    vcache = &v->arch.pv_vcpu.mapcache;
    if ( !dcache->inuse )
        return mfn_to_virt(mfn);

    perfc_incr(map_domain_page_count);

    local_irq_save(flags);

    hashent = &vcache->hash[MAPHASH_HASHFN(mfn)];
    if ( hashent->mfn == mfn )
    {
        idx = hashent->idx;
        ASSERT(idx < dcache->entries);
        hashent->refcnt++;
        ASSERT(hashent->refcnt);
        ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == mfn);
        goto out;
    }

    spin_lock(&dcache->lock);

    /* Has some other CPU caused a wrap? We must flush if so. */
    if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
    {
        vcache->shadow_epoch = dcache->epoch;
        if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
        {
            perfc_incr(domain_page_tlb_flush);
            flush_tlb_local();
        }
    }

    idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
    if ( unlikely(idx >= dcache->entries) )
    {
        unsigned long accum = 0, prev = 0;

        /* /First/, clean the garbage map and update the inuse list. */
        for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
        {
            accum |= prev;
            dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
            prev = ~dcache->inuse[i];
        }

        if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
            idx = find_first_zero_bit(dcache->inuse, dcache->entries);
        else
        {
            /* Replace a hash entry instead. */
            i = MAPHASH_HASHFN(mfn);
            do {
                hashent = &vcache->hash[i];
                if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
                {
                    idx = hashent->idx;
                    ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == hashent->mfn);
                    l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
                    hashent->idx = MAPHASHENT_NOTINUSE;
                    hashent->mfn = ~0UL;
                    break;
                }
                if ( ++i == MAPHASH_ENTRIES )
                    i = 0;
            } while ( i != MAPHASH_HASHFN(mfn) );
        }
        BUG_ON(idx >= dcache->entries);

        /* /Second/, flush TLBs. */
        perfc_incr(domain_page_tlb_flush);
        flush_tlb_local();
        vcache->shadow_epoch = ++dcache->epoch;
        dcache->tlbflush_timestamp = tlbflush_current_time();
    }

    set_bit(idx, dcache->inuse);
    dcache->cursor = idx + 1;

    spin_unlock(&dcache->lock);

    l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_pfn(mfn, __PAGE_HYPERVISOR));

 out:
    local_irq_restore(flags);
    return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
}

void unmap_domain_page(const void *ptr)
{
    unsigned int idx;
    struct vcpu *v;
    struct mapcache_domain *dcache;
    unsigned long va = (unsigned long)ptr, mfn, flags;
    struct vcpu_maphash_entry *hashent;

    if ( va >= DIRECTMAP_VIRT_START )
        return;

    ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);

    v = mapcache_current_vcpu();
    ASSERT(v && !is_hvm_vcpu(v));

    dcache = &v->domain->arch.pv_domain.mapcache;
    ASSERT(dcache->inuse);

    idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
    mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
    hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];

    local_irq_save(flags);

    if ( hashent->idx == idx )
    {
        ASSERT(hashent->mfn == mfn);
        ASSERT(hashent->refcnt);
        hashent->refcnt--;
    }
    else if ( !hashent->refcnt )
    {
        if ( hashent->idx != MAPHASHENT_NOTINUSE )
        {
            /* /First/, zap the PTE. */
            ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) ==
                   hashent->mfn);
            l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
            /* /Second/, mark as garbage. */
            set_bit(hashent->idx, dcache->garbage);
        }

        /* Add newly-freed mapping to the maphash. */
        hashent->mfn = mfn;
        hashent->idx = idx;
    }
    else
    {
        /* /First/, zap the PTE. */
        l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
        /* /Second/, mark as garbage. */
        set_bit(idx, dcache->garbage);
    }

    local_irq_restore(flags);
}

void clear_domain_page(unsigned long mfn)
{
    void *ptr = map_domain_page(mfn);

    clear_page(ptr);
    unmap_domain_page(ptr);
}

void copy_domain_page(unsigned long dmfn, unsigned long smfn)
{
    const void *src = map_domain_page(smfn);
    void *dst = map_domain_page(dmfn);

    copy_page(dst, src);
    unmap_domain_page(dst);
    unmap_domain_page(src);
}

int mapcache_domain_init(struct domain *d)
{
    struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
    unsigned int bitmap_pages;

    if ( is_hvm_domain(d) || is_idle_domain(d) )
        return 0;

#ifdef NDEBUG
    if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
        return 0;
#endif

    BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
                 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
                 MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
    bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
    dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
    dcache->garbage = dcache->inuse +
                      (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);

    spin_lock_init(&dcache->lock);

    return create_perdomain_mapping(d, (unsigned long)dcache->inuse,
                                    2 * bitmap_pages + 1,
                                    NIL(l1_pgentry_t *), NULL);
}

int mapcache_vcpu_init(struct vcpu *v)
{
    struct domain *d = v->domain;
    struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
    unsigned long i;
    unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
    unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));

    if ( is_hvm_vcpu(v) || !dcache->inuse )
        return 0;

    if ( ents > dcache->entries )
    {
        /* Populate page tables. */
        int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, ents,
                                          NIL(l1_pgentry_t *), NULL);

        /* Populate bit maps. */
        if ( !rc )
            rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse,
                                          nr, NULL, NIL(struct page_info *));
        if ( !rc )
            rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage,
                                          nr, NULL, NIL(struct page_info *));

        if ( rc )
            return rc;

        dcache->entries = ents;
    }

    /* Mark all maphash entries as not in use. */
    BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
    for ( i = 0; i < MAPHASH_ENTRIES; i++ )
    {
        struct vcpu_maphash_entry *hashent = &v->arch.pv_vcpu.mapcache.hash[i];

        hashent->mfn = ~0UL; /* never valid to map */
        hashent->idx = MAPHASHENT_NOTINUSE;
    }

    return 0;
}

void *map_domain_page_global(unsigned long mfn)
{
    ASSERT(!in_irq() && local_irq_is_enabled());

#ifdef NDEBUG
    if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
        return mfn_to_virt(mfn);
#endif

    return vmap(&mfn, 1);
}

void unmap_domain_page_global(const void *ptr)
{
    unsigned long va = (unsigned long)ptr;

    if ( va >= DIRECTMAP_VIRT_START )
        return;

    ASSERT(va >= VMAP_VIRT_START && va < VMAP_VIRT_END);

    vunmap(ptr);
}

/* Translate a map-domain-page'd address to the underlying MFN */
unsigned long domain_page_map_to_mfn(const void *ptr)
{
    unsigned long va = (unsigned long)ptr;
    const l1_pgentry_t *pl1e;

    if ( va >= DIRECTMAP_VIRT_START )
        return virt_to_mfn(ptr);

    if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
    {
        pl1e = virt_to_xen_l1e(va);
        BUG_ON(!pl1e);
    }
    else
    {
        ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
        pl1e = &__linear_l1_table[l1_linear_offset(va)];
    }

    return l1e_get_pfn(*pl1e);
}