aboutsummaryrefslogtreecommitdiffstats
path: root/xen/include/xen/sched.h
blob: c37e60f23ac23fd07b2e681cf037d86673a525a9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
#ifndef __SCHED_H__
#define __SCHED_H__

#include <xen/config.h>
#include <xen/types.h>
#include <xen/spinlock.h>
#include <xen/smp.h>
#include <public/xen.h>
#include <public/dom0_ops.h>
#include <public/vcpu.h>
#include <xen/time.h>
#include <xen/timer.h>
#include <xen/grant_table.h>
#include <xen/rangeset.h>
#include <asm/domain.h>
#include <xen/xenoprof.h>
#include <xen/irq.h>

extern unsigned long volatile jiffies;
extern rwlock_t domlist_lock;

/* A global pointer to the initial domain (DOM0). */
extern struct domain *dom0;

#define MAX_EVTCHNS        NR_EVENT_CHANNELS
#define EVTCHNS_PER_BUCKET 128
#define NR_EVTCHN_BUCKETS  (MAX_EVTCHNS / EVTCHNS_PER_BUCKET)

struct evtchn
{
#define ECS_FREE         0 /* Channel is available for use.                  */
#define ECS_RESERVED     1 /* Channel is reserved.                           */
#define ECS_UNBOUND      2 /* Channel is waiting to bind to a remote domain. */
#define ECS_INTERDOMAIN  3 /* Channel is bound to another domain.            */
#define ECS_PIRQ         4 /* Channel is bound to a physical IRQ line.       */
#define ECS_VIRQ         5 /* Channel is bound to a virtual IRQ line.        */
#define ECS_IPI          6 /* Channel is bound to a virtual IPI line.        */
    u8  state;             /* ECS_* */
    u8  consumer_is_xen;   /* Consumed by Xen or by guest? */
    u16 notify_vcpu_id;    /* VCPU for local delivery notification */
    union {
        struct {
            domid_t remote_domid;
        } unbound;     /* state == ECS_UNBOUND */
        struct {
            u16            remote_port;
            struct domain *remote_dom;
        } interdomain; /* state == ECS_INTERDOMAIN */
        u16 pirq;      /* state == ECS_PIRQ */
        u16 virq;      /* state == ECS_VIRQ */
    } u;
};

int  evtchn_init(struct domain *d);
void evtchn_destroy(struct domain *d);

struct vcpu 
{
    int              vcpu_id;

    int              processor;

    vcpu_info_t     *vcpu_info;

    struct domain   *domain;

    struct vcpu     *next_in_list;

    struct timer     timer;         /* one-shot timer for timeout values */
    unsigned long    sleep_tick;    /* tick at which this vcpu started sleep */

    struct timer     poll_timer;    /* timeout for SCHEDOP_poll */

    void            *sched_priv;    /* scheduler-specific data */

    struct vcpu_runstate_info runstate;
    struct vcpu_runstate_info *runstate_guest; /* guest address */

    unsigned long    vcpu_flags;

    spinlock_t       pause_lock;
    unsigned int     pause_count;

    u16              virq_to_evtchn[NR_VIRQS];

    /* Bitmask of CPUs on which this VCPU may run. */
    cpumask_t        cpu_affinity;

    unsigned long    nmi_addr;      /* NMI callback address. */

    /* Bitmask of CPUs which are holding onto this VCPU's state. */
    cpumask_t        vcpu_dirty_cpumask;

    struct arch_vcpu arch;
};

/* Per-domain lock can be recursively acquired in fault handlers. */
#define LOCK_BIGLOCK(_d) spin_lock_recursive(&(_d)->big_lock)
#define UNLOCK_BIGLOCK(_d) spin_unlock_recursive(&(_d)->big_lock)

struct domain
{
    domid_t          domain_id;

    shared_info_t   *shared_info;     /* shared data area */

    spinlock_t       big_lock;

    spinlock_t       page_alloc_lock; /* protects all the following fields  */
    struct list_head page_list;       /* linked list, of size tot_pages     */
    struct list_head xenpage_list;    /* linked list, of size xenheap_pages */
    unsigned int     tot_pages;       /* number of pages currently possesed */
    unsigned int     max_pages;       /* maximum value for tot_pages        */
    unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */

    /* Scheduling. */
    int              shutdown_code; /* code value from OS (if DOMF_shutdown) */
    void            *sched_priv;    /* scheduler-specific data */

    struct domain   *next_in_list;
    struct domain   *next_in_hashbucket;

    struct list_head rangesets;
    spinlock_t       rangesets_lock;

    /* Event channel information. */
    struct evtchn   *evtchn[NR_EVTCHN_BUCKETS];
    spinlock_t       evtchn_lock;

    struct grant_table *grant_table;

    /*
     * Interrupt to event-channel mappings. Updates should be protected by the 
     * domain's event-channel spinlock. Read accesses can also synchronise on 
     * the lock, but races don't usually matter.
     */
    u16              pirq_to_evtchn[NR_IRQS];
    DECLARE_BITMAP(pirq_mask, NR_IRQS);

    /* I/O capabilities (access to IRQs and memory-mapped I/O). */
    struct rangeset *iomem_caps;
    struct rangeset *irq_caps;

    unsigned long    domain_flags;

    spinlock_t       pause_lock;
    unsigned int     pause_count;

    unsigned long    vm_assist;

    atomic_t         refcnt;

    struct vcpu *vcpu[MAX_VIRT_CPUS];

    /* Bitmask of CPUs which are holding onto this domain's state. */
    cpumask_t        domain_dirty_cpumask;

    struct arch_domain arch;

    void *ssid; /* sHype security subject identifier */

    /* Control-plane tools handle for this domain. */
    xen_domain_handle_t handle;

    /* OProfile support. */
    struct xenoprof *xenoprof;
    int32_t time_offset_seconds;
};

struct domain_setup_info
{
    /* Initialised by caller. */
    unsigned long image_addr;
    unsigned long image_len;
    /* Initialised by loader: Public. */
    unsigned long v_start;
    unsigned long v_end;
    unsigned long v_kernstart;
    unsigned long v_kernend;
    unsigned long v_kernentry;
    /* Initialised by loader: Private. */
    unsigned long elf_paddr_offset;
    unsigned int  load_symtab;
    unsigned long symtab_addr;
    unsigned long symtab_len;
    /* Indicate whether it's xen specific image */
    char *xen_section_string;
};

extern struct vcpu *idle_vcpu[NR_CPUS];
#define IDLE_DOMAIN_ID   (0x7FFFU)
#define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
#define is_idle_vcpu(v)   (is_idle_domain((v)->domain))

#define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
#define put_domain(_d) \
  if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)

/*
 * Use this when you don't have an existing reference to @d. It returns
 * FALSE if @d is being destroyed.
 */
static always_inline int get_domain(struct domain *d)
{
    atomic_t old, new, seen = d->refcnt;
    do
    {
        old = seen;
        if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
            return 0;
        _atomic_set(new, _atomic_read(old) + 1);
        seen = atomic_compareandswap(old, new, &d->refcnt);
    }
    while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
    return 1;
}

/*
 * Use this when you already have, or are borrowing, a reference to @d.
 * In this case we know that @d cannot be destroyed under our feet.
 */
static inline void get_knownalive_domain(struct domain *d)
{
    atomic_inc(&d->refcnt);
    ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
}

extern struct domain *domain_create(domid_t domid);
extern int construct_dom0(
    struct domain *d,
    unsigned long image_start, unsigned long image_len, 
    unsigned long initrd_start, unsigned long initrd_len,
    char *cmdline);
extern int set_info_guest(struct domain *d, dom0_setvcpucontext_t *);

struct domain *find_domain_by_id(domid_t dom);
extern void domain_destroy(struct domain *d);
extern void domain_kill(struct domain *d);
extern void domain_shutdown(struct domain *d, u8 reason);
extern void domain_pause_for_debugger(void);

/*
 * Mark specified domain as crashed. This function always returns, even if the
 * caller is the specified domain. The domain is not synchronously descheduled
 * from any processor.
 */
extern void __domain_crash(struct domain *d);
#define domain_crash(d) do {                                              \
    printk("domain_crash called from %s:%d\n", __FILE__, __LINE__);       \
    __domain_crash(d);                                                    \
} while (0)

/*
 * Mark current domain as crashed and synchronously deschedule from the local
 * processor. This function never returns.
 */
extern void __domain_crash_synchronous(void) __attribute__((noreturn));
#define domain_crash_synchronous() do {                                   \
    printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__);  \
    __domain_crash_synchronous();                                         \
} while (0)

void new_thread(struct vcpu *d,
                unsigned long start_pc,
                unsigned long start_stack,
                unsigned long start_info);

#define set_current_state(_s) do { current->state = (_s); } while (0)
void scheduler_init(void);
void schedulers_start(void);
int  sched_init_vcpu(struct vcpu *);
void sched_destroy_domain(struct domain *);
long sched_ctl(struct sched_ctl_cmd *);
long sched_adjdom(struct sched_adjdom_cmd *);
int  sched_id(void);
void vcpu_wake(struct vcpu *d);
void vcpu_sleep_nosync(struct vcpu *d);
void vcpu_sleep_sync(struct vcpu *d);

/*
 * Force synchronisation of given VCPU's state. If it is currently descheduled,
 * this call will ensure that all its state is committed to memory and that
 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
 */
extern void sync_vcpu_execstate(struct vcpu *v);

/*
 * Called by the scheduler to switch to another VCPU. This function must
 * call context_saved(@prev) when the local CPU is no longer running in
 * @prev's context, and that context is saved to memory. Alternatively, if
 * implementing lazy context switching, it suffices to ensure that invoking
 * sync_vcpu_execstate() will switch and commit @prev's state.
 */
extern void context_switch(
    struct vcpu *prev, 
    struct vcpu *next);

/*
 * As described above, context_switch() must call this function when the
 * local CPU is no longer running in @prev's context, and @prev's context is
 * saved to memory. Alternatively, if implementing lazy context switching,
 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
 */
#define context_saved(prev) (clear_bit(_VCPUF_running, &(prev)->vcpu_flags))

/* Called by the scheduler to continue running the current VCPU. */
extern void continue_running(
    struct vcpu *same);

void startup_cpu_idle_loop(void);

/*
 * Creates a continuation to resume the current hypercall. The caller should
 * return immediately, propagating the value returned from this invocation.
 * The format string specifies the types and number of hypercall arguments.
 * It contains one character per argument as follows:
 *  'i' [unsigned] {char, int}
 *  'l' [unsigned] long
 *  'h' guest handle (XEN_GUEST_HANDLE(foo))
 */
unsigned long hypercall_create_continuation(
    unsigned int op, const char *format, ...);

#define hypercall_preempt_check() (unlikely(    \
        softirq_pending(smp_processor_id()) |   \
        local_events_need_delivery()            \
    ))

/* This domain_hash and domain_list are protected by the domlist_lock. */
#define DOMAIN_HASH_SIZE 256
#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
extern struct domain *domain_list;

#define for_each_domain(_d)                     \
 for ( (_d) = domain_list;                      \
       (_d) != NULL;                            \
       (_d) = (_d)->next_in_list )

#define for_each_vcpu(_d,_v)                    \
 for ( (_v) = (_d)->vcpu[0];                    \
       (_v) != NULL;                            \
       (_v) = (_v)->next_in_list )

/*
 * Per-VCPU flags (vcpu_flags).
 */
 /* Has the FPU been initialised? */
#define _VCPUF_fpu_initialised 0
#define VCPUF_fpu_initialised  (1UL<<_VCPUF_fpu_initialised)
 /* Has the FPU been used since it was last saved? */
#define _VCPUF_fpu_dirtied     1
#define VCPUF_fpu_dirtied      (1UL<<_VCPUF_fpu_dirtied)
 /* Domain is blocked waiting for an event. */
#define _VCPUF_blocked         2
#define VCPUF_blocked          (1UL<<_VCPUF_blocked)
 /* Currently running on a CPU? */
#define _VCPUF_running         3
#define VCPUF_running          (1UL<<_VCPUF_running)
 /* Initialization completed. */
#define _VCPUF_initialised     4
#define VCPUF_initialised      (1UL<<_VCPUF_initialised)
 /* VCPU is offline. */
#define _VCPUF_down            5
#define VCPUF_down             (1UL<<_VCPUF_down)
 /* NMI callback pending for this VCPU? */
#define _VCPUF_nmi_pending     8
#define VCPUF_nmi_pending      (1UL<<_VCPUF_nmi_pending)
 /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
#define _VCPUF_nmi_masked      9
#define VCPUF_nmi_masked       (1UL<<_VCPUF_nmi_masked)
 /* VCPU is polling a set of event channels (SCHEDOP_poll). */
#define _VCPUF_polling         10
#define VCPUF_polling          (1UL<<_VCPUF_polling)
 /* VCPU is paused by the hypervisor? */
#define _VCPUF_paused          11
#define VCPUF_paused           (1UL<<_VCPUF_paused)
 /* VCPU is blocked awaiting an event to be consumed by Xen. */
#define _VCPUF_blocked_in_xen  12
#define VCPUF_blocked_in_xen   (1UL<<_VCPUF_blocked_in_xen)

/*
 * Per-domain flags (domain_flags).
 */
 /* Is this domain privileged? */
#define _DOMF_privileged       0
#define DOMF_privileged        (1UL<<_DOMF_privileged)
 /* Guest shut itself down for some reason. */
#define _DOMF_shutdown         1
#define DOMF_shutdown          (1UL<<_DOMF_shutdown)
 /* Death rattle. */
#define _DOMF_dying            2
#define DOMF_dying             (1UL<<_DOMF_dying)
 /* Domain is paused by controller software. */
#define _DOMF_ctrl_pause       3
#define DOMF_ctrl_pause        (1UL<<_DOMF_ctrl_pause)
 /* Domain is being debugged by controller software. */
#define _DOMF_debugging        4
#define DOMF_debugging         (1UL<<_DOMF_debugging)
 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
#define _DOMF_polling          5
#define DOMF_polling           (1UL<<_DOMF_polling)
 /* Domain is paused by the hypervisor? */
#define _DOMF_paused           6
#define DOMF_paused            (1UL<<_DOMF_paused)

static inline int vcpu_runnable(struct vcpu *v)
{
    return ( !(v->vcpu_flags &
               (VCPUF_blocked|VCPUF_down|VCPUF_paused|VCPUF_blocked_in_xen)) &&
             !(v->domain->domain_flags &
               (DOMF_shutdown|DOMF_ctrl_pause|DOMF_paused)) );
}

void vcpu_pause(struct vcpu *v);
void domain_pause(struct domain *d);
void vcpu_unpause(struct vcpu *v);
void domain_unpause(struct domain *d);
void domain_pause_by_systemcontroller(struct domain *d);
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);

int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);

void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);

static inline void vcpu_unblock(struct vcpu *v)
{
    if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
        vcpu_wake(v);
}

#define IS_PRIV(_d)                                         \
    (test_bit(_DOMF_privileged, &(_d)->domain_flags))

#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))

#endif /* __SCHED_H__ */

/*
 * Local variables:
 * mode: C
 * c-set-style: "BSD"
 * c-basic-offset: 4
 * tab-width: 4
 * indent-tabs-mode: nil
 * End:
 */