aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xen/arch/i386/smp.c59
-rw-r--r--xen/common/domain.c51
-rw-r--r--xen/common/event.c4
-rw-r--r--xen/common/kernel.c1
-rw-r--r--xen/common/network.c107
-rw-r--r--xen/include/asm-i386/flushtlb.h14
-rw-r--r--xen/include/asm-i386/pgalloc.h15
-rw-r--r--xen/include/hypervisor-ifs/hypervisor-if.h15
-rw-r--r--xen/include/hypervisor-ifs/network.h31
-rw-r--r--xen/include/xeno/mm.h1
-rw-r--r--xen/include/xeno/sched.h7
-rw-r--r--xen/include/xeno/vif.h43
-rw-r--r--xen/net/dev.c397
-rw-r--r--xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c107
-rw-r--r--xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c5
-rw-r--r--xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h20
16 files changed, 420 insertions, 457 deletions
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
index bc0952fc31..c049ed0e50 100644
--- a/xen/arch/i386/smp.c
+++ b/xen/arch/i386/smp.c
@@ -255,8 +255,10 @@ static inline void send_IPI_all(int vector)
*/
static volatile unsigned long flush_cpumask;
+#if 0
static struct mm_struct * flush_mm;
static unsigned long flush_va;
+#endif
static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
#define FLUSH_ALL 0xffffffff
@@ -323,90 +325,55 @@ asmlinkage void smp_invalidate_interrupt (void)
if (!test_bit(cpu, &flush_cpumask))
return;
- /*
- * This was a BUG() but until someone can quote me the
- * line from the intel manual that guarantees an IPI to
- * multiple CPUs is retried _only_ on the erroring CPUs
- * its staying as a return
- *
- * BUG();
- */
-
+
+#if 0
if (flush_mm == cpu_tlbstate[cpu].active_mm) {
if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
if (flush_va == FLUSH_ALL)
+#endif
local_flush_tlb();
+#if 0
else
__flush_tlb_one(flush_va);
} else
leave_mm(cpu);
}
+#endif
ack_APIC_irq();
clear_bit(cpu, &flush_cpumask);
}
-static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
- unsigned long va)
+void flush_tlb_others(unsigned long cpumask)
{
- /*
- * A couple of (to be removed) sanity checks:
- *
- * - we do not send IPIs to not-yet booted CPUs.
- * - current CPU must not be in mask
- * - mask must exist :)
- */
- if (!cpumask)
- BUG();
- if ((cpumask & cpu_online_map) != cpumask)
- BUG();
- if (cpumask & (1 << smp_processor_id()))
- BUG();
- if (!mm)
- BUG();
-
- /*
- * i'm not happy about this global shared spinlock in the
- * MM hot path, but we'll see how contended it is.
- * Temporarily this turns IRQs off, so that lockups are
- * detected by the NMI watchdog.
- */
spin_lock(&tlbstate_lock);
-
- flush_mm = mm;
- flush_va = va;
atomic_set_mask(cpumask, &flush_cpumask);
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
- while (flush_cpumask)
- /* nothing. lockup detection does not belong here */;
-
- flush_mm = NULL;
- flush_va = 0;
+ while (flush_cpumask) continue;
spin_unlock(&tlbstate_lock);
}
void flush_tlb_current_task(void)
{
+#if 0
struct mm_struct *mm = &current->mm;
unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
local_flush_tlb();
if (cpu_mask)
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+#endif
}
void flush_tlb_mm (struct mm_struct * mm)
{
+#if 0
unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
if (current->active_mm == mm)
local_flush_tlb();
if (cpu_mask)
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+#endif
}
#if 0
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 14fcf8eaec..c4114ae8c7 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -62,13 +62,9 @@ struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu)
p->addr_limit = USER_DS;
p->active_mm = &p->mm;
- p->num_net_vifs = 0;
sched_add_domain(p);
- INIT_LIST_HEAD(&p->net_vifs);
-
- p->net_ring_base = (net_ring_t *)(p->shared_info + 1);
INIT_LIST_HEAD(&p->pg_head);
p->max_pages = p->tot_pages = 0;
write_lock_irqsave(&tasklist_lock, flags);
@@ -112,8 +108,7 @@ void kill_domain_with_errmsg(const char *err)
void __kill_domain(struct task_struct *p)
{
- struct list_head *ent;
- net_vif_t *vif;
+ int i;
if ( p->domain == 0 )
{
@@ -128,11 +123,8 @@ void __kill_domain(struct task_struct *p)
unlink_blkdev_info(p);
- while ( (ent = p->net_vifs.next) != &p->net_vifs )
- {
- vif = list_entry(ent, net_vif_t, dom_list);
- unlink_net_vif(vif);
- }
+ for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
+ unlink_net_vif(p->net_vif_list[i]);
if ( p == current )
{
@@ -300,8 +292,9 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
start_info_t * virt_startinfo_addr;
unsigned long virt_stack_addr;
unsigned long phys_l2tab;
- net_ring_t *net_ring;
+ net_ring_t *shared_rings;
net_vif_t *net_vif;
+ int i;
/* High entries in page table must contain hypervisor
* mem mappings - set them up.
@@ -363,15 +356,16 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo)
/* Add virtual network interfaces and point to them in startinfo. */
while (meminfo->num_vifs-- > 0) {
net_vif = create_net_vif(p->domain);
- net_ring = net_vif->net_ring;
- if (!net_ring) panic("no network ring!\n");
+ shared_rings = net_vif->shared_rings;
+ if (!shared_rings) panic("no network ring!\n");
}
-/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */
-#define SH2G(_x) (meminfo->virt_shinfo_addr | (((unsigned long)(_x)) & 0xFFF))
-
- virt_startinfo_addr->net_rings = (net_ring_t *)SH2G(p->net_ring_base);
- virt_startinfo_addr->num_net_rings = p->num_net_vifs;
+ for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
+ {
+ if ( p->net_vif_list[i] == NULL ) continue;
+ virt_startinfo_addr->net_rings[i] =
+ virt_to_phys(p->net_vif_list[i]->shared_rings);
+ }
/* Add block io interface */
virt_startinfo_addr->blk_ring = virt_to_phys(p->blk_ring_base);
@@ -422,7 +416,7 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
l2_pgentry_t *l2tab, *l2start;
l1_pgentry_t *l1tab = NULL, *l1start = NULL;
struct pfn_info *page = NULL;
- net_ring_t *net_ring;
+ net_ring_t *shared_rings;
net_vif_t *net_vif;
/* Sanity! */
@@ -581,7 +575,6 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
p->shared_info->cpu_freq = cpu_freq;
p->shared_info->domain_time = 0;
-
virt_startinfo_address = (start_info_t *)
(virt_load_address + ((alloc_index - 1) << PAGE_SHIFT));
virt_stack_address = (unsigned long)virt_startinfo_address;
@@ -628,16 +621,16 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params,
/* Add virtual network interfaces and point to them in startinfo. */
while (params->num_vifs-- > 0) {
net_vif = create_net_vif(dom);
- net_ring = net_vif->net_ring;
- if (!net_ring) panic("no network ring!\n");
+ shared_rings = net_vif->shared_rings;
+ if (!shared_rings) panic("no network ring!\n");
}
-/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */
-#define SHIP2GUEST(_x) (virt_shinfo_address | (((unsigned long)(_x)) & 0xFFF))
-
- virt_startinfo_address->net_rings =
- (net_ring_t *)SHIP2GUEST(p->net_ring_base);
- virt_startinfo_address->num_net_rings = p->num_net_vifs;
+ for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
+ {
+ if ( p->net_vif_list[i] == NULL ) continue;
+ virt_startinfo_address->net_rings[i] =
+ virt_to_phys(p->net_vif_list[i]->shared_rings);
+ }
/* Add block io interface */
virt_startinfo_address->blk_ring = virt_to_phys(p->blk_ring_base);
diff --git a/xen/common/event.c b/xen/common/event.c
index 2774806443..b2365f7dfe 100644
--- a/xen/common/event.c
+++ b/xen/common/event.c
@@ -14,14 +14,12 @@
typedef void (*hyp_event_callback_fn_t)(void);
extern void schedule(void);
-extern void update_shared_ring(void);
/* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
static hyp_event_callback_fn_t event_call_fn[] =
{
schedule,
- update_shared_ring,
- kill_domain,
+ kill_domain
};
/* Handle outstanding events for the currently-executing domain. */
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index ad321af3fb..8d569182d3 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -176,7 +176,6 @@ void cmain (unsigned long magic, multiboot_info_t *mbi)
current->shared_info = (void *)get_free_page(GFP_KERNEL);
memset(current->shared_info, 0, sizeof(shared_info_t));
set_fs(USER_DS);
- current->num_net_vifs = 0;
start_of_day();
diff --git a/xen/common/network.c b/xen/common/network.c
index 2cdf11a9a5..e7111fa0f3 100644
--- a/xen/common/network.c
+++ b/xen/common/network.c
@@ -50,44 +50,43 @@ void print_net_rule_list();
net_vif_t *create_net_vif(int domain)
{
- net_vif_t *new_vif;
- net_ring_t *new_ring;
- net_shadow_ring_t *shadow_ring;
- struct task_struct *dom_task;
+ int dom_vif_idx;
+ net_vif_t *new_vif = NULL;
+ net_ring_t *new_ring = NULL;
+ struct task_struct *p = NULL;
unsigned long flags;
- if ( !(dom_task = find_domain_by_id(domain)) )
+ if ( !(p = find_domain_by_id(domain)) )
return NULL;
- if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL )
- return NULL;
-
- new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
- memset(new_ring, 0, sizeof(net_ring_t));
+ for ( dom_vif_idx = 0; dom_vif_idx < MAX_DOMAIN_VIFS; dom_vif_idx++ )
+ if ( p->net_vif_list[dom_vif_idx] == NULL ) break;
+ if ( dom_vif_idx == MAX_DOMAIN_VIFS )
+ goto fail;
- shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
- if ( shadow_ring == NULL ) goto fail;
- memset(shadow_ring, 0, sizeof(*shadow_ring));
+ if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL )
+ goto fail;
- shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
- * sizeof(rx_shadow_entry_t), GFP_KERNEL);
- shadow_ring->tx_ring = kmalloc(TX_RING_SIZE
- * sizeof(tx_shadow_entry_t), GFP_KERNEL);
- if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) )
- goto fail;
+ memset(new_vif, 0, sizeof(*new_vif));
+
+ if ( sizeof(net_ring_t) > PAGE_SIZE ) BUG();
+ new_ring = (net_ring_t *)get_free_page(GFP_KERNEL);
+ clear_page(new_ring);
+ SHARE_PFN_WITH_DOMAIN(virt_to_page(new_ring), domain);
/*
* Fill in the new vif struct. Note that, while the vif's refcnt is
* non-zero, we hold a reference to the task structure.
*/
atomic_set(&new_vif->refcnt, 1);
- new_vif->net_ring = new_ring;
- new_vif->shadow_ring = shadow_ring;
- new_vif->domain = dom_task;
- new_vif->list.next = NULL;
+ new_vif->shared_rings = new_ring;
+ new_vif->shared_idxs = &p->shared_info->net_idx[dom_vif_idx];
+ new_vif->domain = p;
+ new_vif->list.next = NULL;
+ spin_lock_init(&new_vif->rx_lock);
+ spin_lock_init(&new_vif->tx_lock);
- list_add(&new_vif->dom_list, &dom_task->net_vifs);
- dom_task->num_net_vifs++;
+ p->net_vif_list[dom_vif_idx] = new_vif;
write_lock_irqsave(&sys_vif_lock, flags);
new_vif->id = sys_vif_count;
@@ -96,16 +95,11 @@ net_vif_t *create_net_vif(int domain)
return new_vif;
-fail:
- kmem_cache_free(net_vif_cache, new_vif);
- if ( shadow_ring != NULL )
- {
- if ( shadow_ring->rx_ring ) kfree(shadow_ring->rx_ring);
- if ( shadow_ring->tx_ring ) kfree(shadow_ring->tx_ring);
- kfree(shadow_ring);
- }
-
- free_task_struct(dom_task);
+ fail:
+ if ( new_vif != NULL )
+ kmem_cache_free(net_vif_cache, new_vif);
+ if ( p != NULL )
+ free_task_struct(p);
return NULL;
}
@@ -118,25 +112,33 @@ void destroy_net_vif(net_vif_t *vif)
/* Return any outstanding receive buffers to the guest OS. */
spin_lock_irqsave(&p->page_lock, flags);
- for ( i = vif->shadow_ring->rx_idx;
- i != vif->shadow_ring->rx_req_cons;
- i = ((i+1) & (RX_RING_SIZE-1)) )
+ for ( i = vif->rx_cons; i != vif->rx_prod; i = ((i+1) & (RX_RING_SIZE-1)) )
{
- rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i;
- if ( rx->status != RING_STATUS_OK ) continue;
- pte = map_domain_mem(rx->addr);
- *pte |= _PAGE_PRESENT;
- page = frame_table + (*pte >> PAGE_SHIFT);
- page->flags &= ~PG_type_mask;
- if ( (*pte & _PAGE_RW) )
+ rx_shadow_entry_t *rx = vif->rx_shadow_ring + i;
+
+ /* Release the page-table page. */
+ page = frame_table + (rx->pte_ptr >> PAGE_SHIFT);
+ put_page_type(page);
+ put_page_tot(page);
+
+ /* Give the buffer page back to the domain. */
+ page = frame_table + rx->buf_pfn;
+ list_add(&page->list, &p->pg_head);
+ page->flags = vif->domain->domain;
+
+ /* Patch up the PTE if it hasn't changed under our feet. */
+ pte = map_domain_mem(rx->pte_ptr);
+ if ( !(*pte & _PAGE_PRESENT) )
+ {
+ *pte = (rx->buf_pfn<<PAGE_SHIFT) | (*pte & ~PAGE_MASK) |
+ _PAGE_RW | _PAGE_PRESENT;
page->flags |= PGT_writeable_page | PG_need_flush;
+ page->type_count = page->tot_count = 1;
+ }
unmap_domain_mem(pte);
}
spin_unlock_irqrestore(&p->page_lock, flags);
- kfree(vif->shadow_ring->tx_ring);
- kfree(vif->shadow_ring->rx_ring);
- kfree(vif->shadow_ring);
kmem_cache_free(net_vif_cache, vif);
free_task_struct(p);
}
@@ -144,11 +146,16 @@ void destroy_net_vif(net_vif_t *vif)
void unlink_net_vif(net_vif_t *vif)
{
unsigned long flags;
- list_del(&vif->dom_list);
- vif->domain->num_net_vifs--;
+ int i;
+
+ for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
+ if ( vif->domain->net_vif_list[i] == vif )
+ vif->domain->net_vif_list[i] = NULL;
+
write_lock_irqsave(&sys_vif_lock, flags);
sys_vif_list[vif->id] = NULL;
write_unlock_irqrestore(&sys_vif_lock, flags);
+
put_vif(vif);
}
diff --git a/xen/include/asm-i386/flushtlb.h b/xen/include/asm-i386/flushtlb.h
index 306839c6a4..a021597ad6 100644
--- a/xen/include/asm-i386/flushtlb.h
+++ b/xen/include/asm-i386/flushtlb.h
@@ -11,27 +11,25 @@
#define __FLUSHTLB_H
#include <xeno/smp.h>
+#include <asm/atomic.h>
-unsigned long tlb_flush_count[NR_CPUS];
-//#if 0
+atomic_t tlb_flush_count[NR_CPUS];
#define __read_cr3(__var) \
do { \
__asm__ __volatile ( \
"movl %%cr3, %0;" \
: "=r" (__var)); \
} while (0)
-//#endif
#define __write_cr3_counted(__pa) \
do { \
__asm__ __volatile__ ( \
"movl %0, %%cr3;" \
- :: "r" (__pa) \
+ :: "r" (__pa) \
: "memory"); \
- tlb_flush_count[smp_processor_id()]++; \
+ atomic_inc(&tlb_flush_count[smp_processor_id()]); \
} while (0)
-//#endif
#define __flush_tlb_counted() \
do { \
unsigned int tmpreg; \
@@ -39,9 +37,9 @@ unsigned long tlb_flush_count[NR_CPUS];
__asm__ __volatile__( \
"movl %%cr3, %0; # flush TLB \n" \
"movl %0, %%cr3; " \
- : "=r" (tmpreg) \
+ : "=r" (tmpreg) \
:: "memory"); \
- tlb_flush_count[smp_processor_id()]++; \
+ atomic_inc(&tlb_flush_count[smp_processor_id()]); \
} while (0)
#endif
diff --git a/xen/include/asm-i386/pgalloc.h b/xen/include/asm-i386/pgalloc.h
index fcba5e1585..ed5f9cdb16 100644
--- a/xen/include/asm-i386/pgalloc.h
+++ b/xen/include/asm-i386/pgalloc.h
@@ -60,6 +60,11 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
__flush_tlb();
}
+static inline void flush_tlb_cpu(unsigned int cpu)
+{
+ __flush_tlb();
+}
+
#if 0
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr)
@@ -86,7 +91,6 @@ static inline void flush_tlb_range(struct mm_struct *mm,
extern void flush_tlb_all(void);
extern void flush_tlb_current_task(void);
extern void flush_tlb_mm(struct mm_struct *);
-/*extern void flush_tlb_page(struct vm_area_struct *, unsigned long);*/
#define flush_tlb() flush_tlb_current_task()
@@ -95,6 +99,15 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u
flush_tlb_mm(mm);
}
+extern void flush_tlb_others(unsigned long cpumask);
+static inline void flush_tlb_cpu(unsigned int cpu)
+{
+ if ( cpu == smp_processor_id() )
+ __flush_tlb();
+ else
+ flush_tlb_others(1<<cpu);
+}
+
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h
index 6abf2c2222..3ef6ff59ac 100644
--- a/xen/include/hypervisor-ifs/hypervisor-if.h
+++ b/xen/include/hypervisor-ifs/hypervisor-if.h
@@ -217,6 +217,14 @@ typedef struct shared_info_st {
unsigned long long wall_timeout;
unsigned long long domain_timeout;
+ /*
+ * The index structures are all stored here for convenience. The rings
+ * themselves are allocated by Xen but the guestos must create its own
+ * mapping -- the machine address is given in the startinfo structure to
+ * allow this to happen.
+ */
+ net_idx_t net_idx[MAX_DOMAIN_VIFS];
+
} shared_info_t;
/*
@@ -228,9 +236,10 @@ typedef struct start_info_st {
unsigned long pt_base; /* VIRTUAL address of page directory */
unsigned long mod_start; /* VIRTUAL address of pre-loaded module */
unsigned long mod_len; /* size (bytes) of pre-loaded module */
- net_ring_t *net_rings; /* network rings (VIRTUAL ADDRESS) */
- int num_net_rings;
- unsigned long blk_ring; /* block io ring (MACHINE ADDRESS) */
+ /* Machine address of net rings for each VIF. Will be page aligned. */
+ unsigned long net_rings[MAX_DOMAIN_VIFS];
+ /* Machine address of block-device ring. Will be page aligned. */
+ unsigned long blk_ring;
unsigned char cmd_line[1]; /* variable-length */
} start_info_t;
diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h
index 4d4cfe93e6..2de090ab62 100644
--- a/xen/include/hypervisor-ifs/network.h
+++ b/xen/include/hypervisor-ifs/network.h
@@ -17,14 +17,14 @@
typedef struct tx_req_entry_st
{
- unsigned long id;
- unsigned long addr; /* machine address of packet */
+ unsigned short id;
unsigned short size; /* packet size in bytes */
+ unsigned long addr; /* machine address of packet */
} tx_req_entry_t;
typedef struct tx_resp_entry_st
{
- unsigned long id;
+ unsigned short id;
unsigned char status;
} tx_resp_entry_t;
@@ -37,13 +37,13 @@ typedef union tx_entry_st
typedef struct rx_req_entry_st
{
- unsigned long id;
+ unsigned short id;
unsigned long addr; /* machine address of PTE to swizzle */
} rx_req_entry_t;
typedef struct rx_resp_entry_st
{
- unsigned long id;
+ unsigned short id;
unsigned short size; /* received packet size in bytes */
unsigned char status; /* per descriptor status */
unsigned char offset; /* offset in page of received pkt */
@@ -59,22 +59,26 @@ typedef union rx_entry_st
#define TX_RING_SIZE 256
#define RX_RING_SIZE 256
+#define MAX_DOMAIN_VIFS 8
+
+/* This structure must fit in a memory page. */
typedef struct net_ring_st
{
+ tx_entry_t tx_ring[TX_RING_SIZE];
+ rx_entry_t rx_ring[RX_RING_SIZE];
+} net_ring_t;
+
+typedef struct net_idx_st
+{
/*
* Guest OS places packets into ring at tx_req_prod.
* Guest OS receives DOMAIN_EVENT_NET_TX when tx_resp_prod passes tx_event.
- */
- tx_entry_t *tx_ring;
- unsigned int tx_req_prod, tx_resp_prod, tx_event;
-
- /*
* Guest OS places empty buffers into ring at rx_req_prod.
* Guest OS receives DOMAIN_EVENT_NET_RX when rx_rssp_prod passes rx_event.
*/
- rx_entry_t *rx_ring;
+ unsigned int tx_req_prod, tx_resp_prod, tx_event;
unsigned int rx_req_prod, rx_resp_prod, rx_event;
-} net_ring_t;
+} net_idx_t;
/*
* Packet routing/filtering code follows:
@@ -144,7 +148,6 @@ int add_net_rule(net_rule_t *rule);
/* Descriptor status values */
#define RING_STATUS_OK 0 /* Everything is gravy. */
-#define RING_STATUS_ERR_CFU 1 /* Copy from user problems. */
-#define RING_STATUS_BAD_PAGE 2 /* What they gave us was pure evil */
+#define RING_STATUS_BAD_PAGE 1 /* What they gave us was pure evil */
#endif
diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h
index f257caffc7..6f7eaa89b7 100644
--- a/xen/include/xeno/mm.h
+++ b/xen/include/xeno/mm.h
@@ -97,7 +97,6 @@ typedef struct pfn_info {
#define PGT_gdt_page (5<<24) /* using this page in a GDT? */
#define PGT_ldt_page (6<<24) /* using this page in an LDT? */
#define PGT_writeable_page (7<<24) /* has writable mappings of this page? */
-#define PGT_net_rx_buf (8<<24) /* this page taken by the net code. */
/*
* This bit indicates that the TLB must be flushed when the type count of this
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
index 3bc997bed3..9f760e5e7f 100644
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -50,8 +50,7 @@ extern struct mm_struct init_mm;
}
#define _HYP_EVENT_NEED_RESCHED 0
-#define _HYP_EVENT_NET 1
-#define _HYP_EVENT_DIE 2
+#define _HYP_EVENT_DIE 1
#define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */
#define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */
@@ -118,9 +117,7 @@ struct task_struct {
long uwarped; /* time it ran unwarped last time */
/* Network I/O */
- net_ring_t *net_ring_base;
- struct list_head net_vifs;
- int num_net_vifs;
+ net_vif_t *net_vif_list[MAX_DOMAIN_VIFS];
/* Block I/O */
blk_ring_t *blk_ring_base;
diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h
index 730c1cb084..b0dc6c645e 100644
--- a/xen/include/xeno/vif.h
+++ b/xen/include/xeno/vif.h
@@ -27,59 +27,51 @@
typedef struct rx_shadow_entry_st
{
- unsigned long id;
/* IN vars */
- unsigned long addr;
- /* OUT vars */
- unsigned short size;
- unsigned char status;
- unsigned char offset;
+ unsigned short id;
+ unsigned long pte_ptr;
+ unsigned long buf_pfn;
/* PRIVATE vars */
unsigned long flush_count;
} rx_shadow_entry_t;
typedef struct tx_shadow_entry_st
{
- unsigned long id;
/* IN vars */
+ unsigned short id;
void *header;
unsigned long payload;
- unsigned short size;
/* OUT vars */
+ unsigned short size;
unsigned char status;
} tx_shadow_entry_t;
-typedef struct net_shadow_ring_st {
- rx_shadow_entry_t *rx_ring;
+typedef struct net_vif_st {
+ /* The shared rings and indexes. */
+ net_ring_t *shared_rings;
+ net_idx_t *shared_idxs;
+
+ /* The private rings and indexes. */
+ rx_shadow_entry_t rx_shadow_ring[RX_RING_SIZE];
unsigned int rx_prod; /* More buffers for filling go here. */
- unsigned int rx_idx; /* Next buffer to fill is here. */
- unsigned int rx_cons; /* Next buffer to create response for is here. */
-
- tx_shadow_entry_t *tx_ring;
- /*
- * These cannot be derived from shared variables, as not all packets
- * will end up on the shadow ring (eg. locally delivered packets).
- */
+ unsigned int rx_cons; /* Next buffer to fill is here. */
+ tx_shadow_entry_t tx_shadow_ring[TX_RING_SIZE];
unsigned int tx_prod; /* More packets for sending go here. */
unsigned int tx_idx; /* Next packet to send is here. */
- unsigned int tx_transmitted_prod; /* Next packet to finish transmission. */
unsigned int tx_cons; /* Next packet to create response for is here. */
- /* Indexes into shared ring. */
+ /* Private indexes into shared ring. */
unsigned int rx_req_cons;
unsigned int rx_resp_prod; /* private version of shared variable */
unsigned int tx_req_cons;
unsigned int tx_resp_prod; /* private version of shared variable */
-} net_shadow_ring_t;
-typedef struct net_vif_st {
- net_ring_t *net_ring;
- net_shadow_ring_t *shadow_ring;
+ /* Miscellaneous private stuff. */
int id;
struct task_struct *domain;
struct list_head list; /* scheduling list */
- struct list_head dom_list; /* domain list */
atomic_t refcnt;
+ spinlock_t rx_lock, tx_lock;
} net_vif_t;
#define get_vif(_v) (atomic_inc(&(_v)->refcnt))
@@ -89,7 +81,6 @@ do { \
} while (0) \
/* VIF-related defines. */
-#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct
#define MAX_SYSTEM_VIFS 256
/* vif globals */
diff --git a/xen/net/dev.c b/xen/net/dev.c
index 7fbf165b0e..c19fad62a2 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -50,10 +50,10 @@
#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
static void make_tx_response(net_vif_t *vif,
- unsigned long id,
- unsigned char st);
+ unsigned short id,
+ unsigned char st);
static void make_rx_response(net_vif_t *vif,
- unsigned long id,
+ unsigned short id,
unsigned short size,
unsigned char st,
unsigned char off);
@@ -491,110 +491,81 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb)
struct netif_rx_stats netdev_rx_stat[NR_CPUS];
-/*
- * update_shared_ring(void)
- *
- * This replaces flush_rx_queue as the guest event handler to move packets
- * queued in the guest ring up to the guest. Really, the packet is already
- * there, it was page flipped in deliver_packet, but this moves the ring
- * descriptor across from the shadow ring and increments the pointers.
- */
-void update_shared_ring(void)
-{
- rx_shadow_entry_t *rx;
- tx_shadow_entry_t *tx;
- net_ring_t *net_ring;
- net_shadow_ring_t *shadow_ring;
- net_vif_t *vif;
- struct list_head *ent;
-
- clear_bit(_HYP_EVENT_NET, &current->hyp_events);
-
- list_for_each(ent, &current->net_vifs)
- {
- vif = list_entry(ent, net_vif_t, dom_list);
- net_ring = vif->net_ring;
- shadow_ring = vif->shadow_ring;
-
- while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
- {
- rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
- if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
- __flush_tlb();
- shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
- make_rx_response(vif, rx->id, rx->size, rx->status, rx->offset);
- }
-
- while ( shadow_ring->tx_cons != shadow_ring->tx_transmitted_prod )
- {
- tx = shadow_ring->tx_ring + shadow_ring->tx_cons;
- shadow_ring->tx_cons = RX_RING_INC(shadow_ring->tx_cons);
- make_tx_response(vif, tx->id, tx->status);
- }
- }
-}
-
void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
{
- net_shadow_ring_t *shadow_ring;
rx_shadow_entry_t *rx;
- unsigned long *g_pte;
- struct pfn_info *g_pfn, *h_pfn;
+ unsigned long *ptep;
+ struct pfn_info *old_page, *new_page, *pte_page;
unsigned int i;
+ unsigned short size;
+ unsigned char offset, status = RING_STATUS_OK;
memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
memset(skb->nh.raw + 18, 0, ETH_ALEN);
- shadow_ring = vif->shadow_ring;
- if ( (i = shadow_ring->rx_idx) == shadow_ring->rx_prod )
+ if ( (i = vif->rx_cons) == vif->rx_prod )
return;
- rx = shadow_ring->rx_ring + i;
+ rx = vif->rx_shadow_ring + i;
- ASSERT(rx->status == RING_STATUS_OK);
- ASSERT(skb->len <= PAGE_SIZE);
-
- rx->size = skb->len;
- rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
+ size = (unsigned short)skb->len;
+ offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
spin_lock(&vif->domain->page_lock);
- g_pte = map_domain_mem(rx->addr);
+ /* Release the page-table page. */
+ pte_page = frame_table + (rx->pte_ptr >> PAGE_SHIFT);
+ put_page_type(pte_page);
+ put_page_tot(pte_page);
- g_pfn = frame_table + (*g_pte >> PAGE_SHIFT);
- h_pfn = skb->pf;
-
- h_pfn->tot_count = 1;
- g_pfn->tot_count = g_pfn->type_count = h_pfn->type_count = 0;
- h_pfn->flags = g_pfn->flags & ~PG_type_mask;
- g_pfn->flags = 0;
-
- if ( (*g_pte & _PAGE_RW) )
+ old_page = frame_table + rx->buf_pfn;
+ new_page = skb->pf;
+
+ ptep = map_domain_mem(rx->pte_ptr);
+
+ if ( (*ptep & _PAGE_PRESENT) )
{
- h_pfn->flags |= PGT_writeable_page | PG_need_flush;
- h_pfn->type_count = 1;
+ /* Bail out if the PTE has been reused under our feet. */
+ list_add(&old_page->list, &vif->domain->pg_head);
+ old_page->flags = vif->domain->domain;
+ status = RING_STATUS_BAD_PAGE;
+ goto out;
}
-
- /* Point the guest at the new machine frame. */
- machine_to_phys_mapping[h_pfn - frame_table]
- = machine_to_phys_mapping[g_pfn - frame_table];
- *g_pte = (*g_pte & ~PAGE_MASK)
- | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
- *g_pte |= _PAGE_PRESENT;
-
- unmap_domain_mem(g_pte);
- list_del(&g_pfn->list);
- list_add(&h_pfn->list, &vif->domain->pg_head);
+ /* Give the new page to the domain, marking it writeable. */
+ new_page->tot_count = new_page->type_count = 1;
+ new_page->flags = vif->domain->domain | PGT_writeable_page | PG_need_flush;
+ list_add(&new_page->list, &vif->domain->pg_head);
+
+ /* Patch the PTE to map the new page as writeable. */
+ machine_to_phys_mapping[new_page - frame_table]
+ = machine_to_phys_mapping[old_page - frame_table];
+ *ptep = (*ptep & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT |
+ (((new_page - frame_table) << PAGE_SHIFT) & PAGE_MASK);
+
+ unmap_domain_mem(ptep);
spin_unlock(&vif->domain->page_lock);
/* Our skbuff now points at the guest's old frame. */
- skb->pf = g_pfn;
+ skb->pf = old_page;
+
+ /* Updates must happen before releasing the descriptor. */
+ smp_wmb();
- smp_wmb(); /* updates must happen before releasing the descriptor. */
- shadow_ring->rx_idx = RX_RING_INC(i);
+ /*
+ * NB. The remote flush here should be safe, as we hold no locks. The
+ * network driver that called us should also have no nasty locks.
+ */
+ rx = vif->rx_shadow_ring + vif->rx_cons;
+ if ( rx->flush_count ==
+ atomic_read(&tlb_flush_count[vif->domain->processor]) )
+ flush_tlb_cpu(vif->domain->processor);
+
+ out:
+ vif->rx_cons = RX_RING_INC(vif->rx_cons);
+ make_rx_response(vif, rx->id, size, status, offset);
}
/**
@@ -613,7 +584,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
int netif_rx(struct sk_buff *skb)
{
- unsigned long cpu_mask;
int offset, this_cpu = smp_processor_id();
unsigned long flags;
net_vif_t *vif;
@@ -655,14 +625,11 @@ int netif_rx(struct sk_buff *skb)
get_vif(vif);
read_unlock(&sys_vif_lock);
-
deliver_packet(skb, vif);
- cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET);
put_vif(vif);
unmap_domain_mem(skb->head);
kfree_skb(skb);
- hyp_event_notify(cpu_mask);
local_irq_restore(flags);
return NET_RX_SUCCESS;
}
@@ -724,7 +691,8 @@ static void tx_skb_release(struct sk_buff *skb)
{
int i;
net_vif_t *vif = sys_vif_list[skb->src_vif];
- unsigned long cpu_mask, flags;
+ tx_shadow_entry_t *tx;
+ unsigned long flags;
spin_lock_irqsave(&vif->domain->page_lock, flags);
for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
@@ -736,10 +704,9 @@ static void tx_skb_release(struct sk_buff *skb)
skb_shinfo(skb)->nr_frags = 0;
- vif->shadow_ring->tx_transmitted_prod =
- TX_RING_INC(vif->shadow_ring->tx_transmitted_prod);
- cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET);
- hyp_event_notify(cpu_mask);
+ tx = vif->tx_shadow_ring + vif->tx_cons;
+ vif->tx_cons = TX_RING_INC(vif->tx_cons);
+ make_tx_response(vif, tx->id, tx->status);
put_vif(vif);
}
@@ -762,7 +729,7 @@ static void net_tx_action(unsigned long unused)
vif = list_entry(ent, net_vif_t, list);
get_vif(vif);
remove_from_net_schedule_list(vif);
- if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod )
+ if ( vif->tx_idx == vif->tx_prod )
{
put_vif(vif);
continue;
@@ -777,9 +744,9 @@ static void net_tx_action(unsigned long unused)
}
/* Pick an entry from the transmit queue. */
- tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
- vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
- if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
+ tx = &vif->tx_shadow_ring[vif->tx_idx];
+ vif->tx_idx = TX_RING_INC(vif->tx_idx);
+ if ( vif->tx_idx != vif->tx_prod )
add_to_net_schedule_list_tail(vif);
ASSERT(tx->status == RING_STATUS_OK);
@@ -1790,26 +1757,27 @@ inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev)
long do_net_update(void)
{
- struct list_head *ent;
- net_ring_t *net_ring;
- net_shadow_ring_t *shadow_ring;
- net_vif_t *current_vif;
- unsigned int i, j;
+ net_ring_t *shared_rings;
+ net_vif_t *vif;
+ net_idx_t *shared_idxs;
+ unsigned int i, j, idx;
struct sk_buff *skb;
tx_req_entry_t tx;
rx_req_entry_t rx;
- unsigned long pfn;
- struct pfn_info *page;
- unsigned long *g_pte;
+ unsigned long pte_pfn, buf_pfn;
+ struct pfn_info *pte_page, *buf_page;
+ unsigned long *ptep;
int target;
u8 *g_data;
unsigned short protocol;
-
- list_for_each(ent, &current->net_vifs)
+
+ for ( idx = 0; idx < MAX_DOMAIN_VIFS; idx++ )
{
- current_vif = list_entry(ent, net_vif_t, dom_list);
- net_ring = current_vif->net_ring;
- shadow_ring = current_vif->shadow_ring;
+ if ( (vif = current->net_vif_list[idx]) == NULL )
+ break;
+
+ shared_idxs = vif->shared_idxs;
+ shared_rings = vif->shared_rings;
/*
* PHASE 1 -- TRANSMIT RING
@@ -1820,23 +1788,18 @@ long do_net_update(void)
* new producer index, but take care not to catch up with our own
* consumer index.
*/
- j = shadow_ring->tx_prod;
- for ( i = shadow_ring->tx_req_cons;
- (i != net_ring->tx_req_prod) &&
- (((shadow_ring->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1);
+ j = vif->tx_prod;
+ for ( i = vif->tx_req_cons;
+ (i != shared_idxs->tx_req_prod) &&
+ (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1);
i = TX_RING_INC(i) )
{
- if ( copy_from_user(&tx, &net_ring->tx_ring[i].req, sizeof(tx)) )
- {
- DPRINTK("Bad copy_from_user for tx net descriptor\n");
- make_tx_response(current_vif, tx.id, RING_STATUS_ERR_CFU);
- continue;
- }
+ tx = shared_rings->tx_ring[i].req;
if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) )
{
DPRINTK("Bad packet size: %d\n", tx.size);
- make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
+ make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
continue;
}
@@ -1845,19 +1808,19 @@ long do_net_update(void)
{
DPRINTK("tx.addr: %lx, size: %u, end: %lu\n",
tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
- make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
+ make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
continue;
}
- pfn = tx.addr >> PAGE_SHIFT;
- page = frame_table + pfn;
+ buf_pfn = tx.addr >> PAGE_SHIFT;
+ buf_page = frame_table + buf_pfn;
spin_lock_irq(&current->page_lock);
- if ( (pfn >= max_page) ||
- ((page->flags & PG_domain_mask) != current->domain) )
+ if ( (buf_pfn >= max_page) ||
+ ((buf_page->flags & PG_domain_mask) != current->domain) )
{
DPRINTK("Bad page frame\n");
spin_unlock_irq(&current->page_lock);
- make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
+ make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
continue;
}
@@ -1867,22 +1830,22 @@ long do_net_update(void)
init_tx_header(g_data, tx.size, the_dev));
if ( protocol == 0 )
{
- make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
+ make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
goto tx_unmap_and_continue;
}
- target = __net_get_target_vif(g_data, tx.size, current_vif->id);
+ target = __net_get_target_vif(g_data, tx.size, vif->id);
if ( target > VIF_PHYSICAL_INTERFACE )
{
/* Local delivery */
if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL )
{
- make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
+ make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
goto tx_unmap_and_continue;
}
- skb->src_vif = current_vif->id;
+ skb->src_vif = vif->id;
skb->dst_vif = target;
skb->protocol = protocol;
@@ -1902,24 +1865,24 @@ long do_net_update(void)
(void)netif_rx(skb);
- make_tx_response(current_vif, tx.id, RING_STATUS_OK);
+ make_tx_response(vif, tx.id, RING_STATUS_OK);
}
else if ( target == VIF_PHYSICAL_INTERFACE )
{
- shadow_ring->tx_ring[j].id = tx.id;
- shadow_ring->tx_ring[j].size = tx.size;
- shadow_ring->tx_ring[j].status = RING_STATUS_OK;
- shadow_ring->tx_ring[j].header =
+ vif->tx_shadow_ring[j].id = tx.id;
+ vif->tx_shadow_ring[j].size = tx.size;
+ vif->tx_shadow_ring[j].status = RING_STATUS_OK;
+ vif->tx_shadow_ring[j].header =
kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
- if ( shadow_ring->tx_ring[j].header == NULL )
+ if ( vif->tx_shadow_ring[j].header == NULL )
{
- make_tx_response(current_vif, tx.id, RING_STATUS_OK);
+ make_tx_response(vif, tx.id, RING_STATUS_OK);
goto tx_unmap_and_continue;
}
- memcpy(shadow_ring->tx_ring[j].header, g_data, PKT_PROT_LEN);
- shadow_ring->tx_ring[j].payload = tx.addr + PKT_PROT_LEN;
- get_page_tot(page);
+ memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN);
+ vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN;
+ get_page_tot(buf_page);
j = TX_RING_INC(j);
}
@@ -1928,13 +1891,13 @@ long do_net_update(void)
spin_unlock_irq(&current->page_lock);
}
- shadow_ring->tx_req_cons = i;
+ vif->tx_req_cons = i;
- if ( shadow_ring->tx_prod != j )
+ if ( vif->tx_prod != j )
{
smp_mb(); /* Let other CPUs see new descriptors first. */
- shadow_ring->tx_prod = j;
- add_to_net_schedule_list_tail(current_vif);
+ vif->tx_prod = j;
+ add_to_net_schedule_list_tail(vif);
maybe_schedule_tx_action();
}
@@ -1947,77 +1910,79 @@ long do_net_update(void)
* new producer index, but take care not to catch up with our own
* consumer index.
*/
- j = shadow_ring->rx_prod;
- for ( i = shadow_ring->rx_req_cons;
- (i != net_ring->rx_req_prod) &&
- (((shadow_ring->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1);
+ j = vif->rx_prod;
+ for ( i = vif->rx_req_cons;
+ (i != shared_idxs->rx_req_prod) &&
+ (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1);
i = RX_RING_INC(i) )
{
- if ( copy_from_user(&rx, &net_ring->rx_ring[i].req, sizeof(rx)) )
- {
- DPRINTK("Bad copy_from_user for rx net descriptor\n");
- make_rx_response(current_vif,
- rx.id, 0, RING_STATUS_ERR_CFU, 0);
- continue;
- }
+ rx = shared_rings->rx_ring[i].req;
- pfn = rx.addr >> PAGE_SHIFT;
- page = frame_table + pfn;
+ pte_pfn = rx.addr >> PAGE_SHIFT;
+ pte_page = frame_table + pte_pfn;
spin_lock_irq(&current->page_lock);
- if ( (pfn >= max_page) ||
- (page->flags != (PGT_l1_page_table | current->domain)) )
+ if ( (pte_pfn >= max_page) ||
+ ((pte_page->flags & (PG_type_mask | PG_domain_mask)) !=
+ (PGT_l1_page_table | current->domain)) )
{
DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
- current->domain, pfn, max_page, page->flags);
+ current->domain, pte_pfn, max_page, pte_page->flags);
spin_unlock_irq(&current->page_lock);
- make_rx_response(current_vif,
- rx.id, 0, RING_STATUS_BAD_PAGE, 0);
+ make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
continue;
}
- g_pte = map_domain_mem(rx.addr);
+ ptep = map_domain_mem(rx.addr);
- if ( !(*g_pte & _PAGE_PRESENT) )
+ if ( !(*ptep & _PAGE_PRESENT) )
{
DPRINTK("Invalid PTE passed down (not present)\n");
- make_rx_response(current_vif,
- rx.id, 0, RING_STATUS_BAD_PAGE, 0);
+ make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
goto rx_unmap_and_continue;
}
- page = (*g_pte >> PAGE_SHIFT) + frame_table;
-
- if ( page->tot_count != 1 )
+ buf_pfn = *ptep >> PAGE_SHIFT;
+ buf_page = frame_table + buf_pfn;
+
+ if ( ((buf_page->flags & (PG_type_mask | PG_domain_mask)) !=
+ (PGT_writeable_page | current->domain)) ||
+ (buf_page->tot_count != 1) )
{
- DPRINTK("RX page mapped multple times (%d/%d/%08x)\n",
- page->type_count, page->tot_count, page->flags);
- make_rx_response(current_vif,
- rx.id, 0, RING_STATUS_BAD_PAGE, 0);
+ DPRINTK("Need a mapped-once writeable page (%d/%d/%08x)\n",
+ buf_page->type_count, buf_page->tot_count, buf_page->flags);
+ make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
goto rx_unmap_and_continue;
}
- /* The pte they passed was good, so take it away from them. */
- *g_pte &= ~_PAGE_PRESENT;
- page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf;
- shadow_ring->rx_ring[j].id = rx.id;
- shadow_ring->rx_ring[j].addr = rx.addr;
- shadow_ring->rx_ring[j].status = RING_STATUS_OK;
- shadow_ring->rx_ring[j].flush_count =
- tlb_flush_count[smp_processor_id()];
+ /*
+ * The pte they passed was good, so take it away from them. We
+ * also lock down the page-table page, so it doesn't go away.
+ */
+ get_page_type(pte_page);
+ get_page_tot(pte_page);
+ *ptep &= ~_PAGE_PRESENT;
+ buf_page->flags = buf_page->type_count = buf_page->tot_count = 0;
+ list_del(&buf_page->list);
+
+ vif->rx_shadow_ring[j].id = rx.id;
+ vif->rx_shadow_ring[j].pte_ptr = rx.addr;
+ vif->rx_shadow_ring[j].buf_pfn = buf_pfn;
+ vif->rx_shadow_ring[j].flush_count =
+ atomic_read(&tlb_flush_count[smp_processor_id()]);
j = RX_RING_INC(j);
rx_unmap_and_continue:
- unmap_domain_mem(g_pte);
+ unmap_domain_mem(ptep);
spin_unlock_irq(&current->page_lock);
}
- shadow_ring->rx_req_cons = i;
+ vif->rx_req_cons = i;
- if ( shadow_ring->rx_prod != j )
+ if ( vif->rx_prod != j )
{
smp_mb(); /* Let other CPUs see new descriptors first. */
- shadow_ring->rx_prod = j;
+ vif->rx_prod = j;
}
}
@@ -2025,55 +1990,57 @@ long do_net_update(void)
}
-static void make_tx_response(net_vif_t *vif,
- unsigned long id,
- unsigned char st)
+static void make_tx_response(net_vif_t *vif,
+ unsigned short id,
+ unsigned char st)
{
unsigned long flags;
- net_shadow_ring_t *shadow = vif->shadow_ring;
unsigned int pos;
- tx_resp_entry_t *resp, privresp;
+ tx_resp_entry_t *resp;
/* Place on the response ring for the relevant domain. */
- local_irq_save(flags);
- pos = shadow->tx_resp_prod;
- resp = &vif->net_ring->tx_ring[pos].resp;
- privresp.id = id;
- privresp.status = st;
- copy_to_user(resp, &privresp, sizeof(privresp));
+ spin_lock_irqsave(&vif->tx_lock, flags);
+ pos = vif->tx_resp_prod;
+ resp = &vif->shared_rings->tx_ring[pos].resp;
+ resp->id = id;
+ resp->status = st;
pos = TX_RING_INC(pos);
- shadow->tx_resp_prod = vif->net_ring->tx_resp_prod = pos;
- if ( pos == vif->net_ring->rx_event )
- set_bit(_EVENT_NET_TX, &current->shared_info->events);
- local_irq_restore(flags);
+ vif->tx_resp_prod = vif->shared_idxs->tx_resp_prod = pos;
+ if ( pos == vif->shared_idxs->rx_event )
+ {
+ unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX);
+ guest_event_notify(cpu_mask);
+ }
+ spin_unlock_irqrestore(&vif->tx_lock, flags);
}
static void make_rx_response(net_vif_t *vif,
- unsigned long id,
+ unsigned short id,
unsigned short size,
unsigned char st,
unsigned char off)
{
unsigned long flags;
- net_shadow_ring_t *shadow = vif->shadow_ring;
unsigned int pos;
- rx_resp_entry_t *resp, privresp;
+ rx_resp_entry_t *resp;
/* Place on the response ring for the relevant domain. */
- local_irq_save(flags);
- pos = shadow->rx_resp_prod;
- resp = &vif->net_ring->rx_ring[pos].resp;
- privresp.id = id;
- privresp.size = size;
- privresp.status = st;
- privresp.offset = off;
- copy_to_user(resp, &privresp, sizeof(privresp));
+ spin_lock_irqsave(&vif->rx_lock, flags);
+ pos = vif->rx_resp_prod;
+ resp = &vif->shared_rings->rx_ring[pos].resp;
+ resp->id = id;
+ resp->size = size;
+ resp->status = st;
+ resp->offset = off;
pos = RX_RING_INC(pos);
- shadow->rx_resp_prod = vif->net_ring->rx_resp_prod = pos;
- if ( pos == vif->net_ring->rx_event )
- set_bit(_EVENT_NET_RX, &current->shared_info->events);
- local_irq_restore(flags);
+ vif->rx_resp_prod = vif->shared_idxs->rx_resp_prod = pos;
+ if ( pos == vif->shared_idxs->rx_event )
+ {
+ unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_RX);
+ guest_event_notify(cpu_mask);
+ }
+ spin_unlock_irqrestore(&vif->rx_lock, flags);
}
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
index 3ae5e3d1a0..03f9939e4e 100644
--- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
+++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
@@ -56,9 +56,26 @@ struct net_private
atomic_t tx_entries;
unsigned int rx_resp_cons, tx_resp_cons, tx_full;
net_ring_t *net_ring;
+ net_idx_t *net_idx;
spinlock_t tx_lock;
+
+ /*
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+ * array is an index into a chain of free entries.
+ */
+ struct sk_buff *tx_skbs[TX_RING_SIZE];
+ struct sk_buff *rx_skbs[RX_RING_SIZE];
};
+/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id) \
+ (_list)[(_id)] = (_list)[0]; \
+ (_list)[0] = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list) \
+ ({ unsigned long _id = (unsigned long)(_list)[0]; \
+ (_list)[0] = (_list)[_id]; \
+ _id; })
+
static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
@@ -67,33 +84,33 @@ static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs)
printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
" tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons,
- np->net_ring->tx_req_prod, np->net_ring->tx_resp_prod,
- np->net_ring->tx_event,
+ np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
+ np->net_idx->tx_event,
test_bit(__LINK_STATE_XOFF, &dev->state));
+ printk(KERN_ALERT "rx_resp_cons = %d,"
+ " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n",
+ np->rx_resp_cons, np->net_idx->rx_req_prod,
+ np->net_idx->rx_resp_prod, np->net_idx->rx_event);
}
static int network_open(struct net_device *dev)
{
struct net_private *np = dev->priv;
- int error = 0;
+ int i, error = 0;
np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
memset(&np->stats, 0, sizeof(np->stats));
spin_lock_init(&np->tx_lock);
atomic_set(&np->tx_entries, 0);
memset(np->net_ring, 0, sizeof(*np->net_ring));
+ memset(np->net_idx, 0, sizeof(*np->net_idx));
- np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t),
- GFP_KERNEL);
- np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t),
- GFP_KERNEL);
- if ( (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
- {
- printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name);
- error = -ENOBUFS;
- goto fail;
- }
+ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+ for ( i = 0; i < TX_RING_SIZE; i++ )
+ np->tx_skbs[i] = (void *)(i+1);
+ for ( i = 0; i < RX_RING_SIZE; i++ )
+ np->rx_skbs[i] = (void *)(i+1);
network_alloc_rx_buffers(dev);
@@ -135,8 +152,6 @@ static int network_open(struct net_device *dev)
return 0;
fail:
- if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
- if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
kfree(np);
return error;
}
@@ -154,11 +169,12 @@ static void network_tx_buf_gc(struct net_device *dev)
spin_lock_irqsave(&np->tx_lock, flags);
do {
- prod = np->net_ring->tx_resp_prod;
+ prod = np->net_idx->tx_resp_prod;
for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
{
- skb = (struct sk_buff *)tx_ring[i].resp.id;
+ skb = np->tx_skbs[tx_ring[i].resp.id];
+ ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id);
dev_kfree_skb_any(skb);
atomic_dec(&np->tx_entries);
}
@@ -166,11 +182,11 @@ static void network_tx_buf_gc(struct net_device *dev)
np->tx_resp_cons = prod;
/* Set a new event, then check for race with update of tx_cons. */
- np->net_ring->tx_event =
+ np->net_idx->tx_event =
TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
smp_mb();
}
- while ( prod != np->net_ring->tx_resp_prod );
+ while ( prod != np->net_idx->tx_resp_prod );
if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
{
@@ -192,24 +208,28 @@ inline pte_t *get_ppte(void *addr)
static void network_alloc_rx_buffers(struct net_device *dev)
{
- unsigned int i;
+ unsigned int i, id;
struct net_private *np = dev->priv;
struct sk_buff *skb;
unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);
- for ( i = np->net_ring->rx_req_prod; i != end; i = RX_RING_INC(i) )
+ for ( i = np->net_idx->rx_req_prod; i != end; i = RX_RING_INC(i) )
{
skb = dev_alloc_skb(RX_BUF_SIZE);
if ( skb == NULL ) break;
skb->dev = dev;
- np->net_ring->rx_ring[i].req.id = (unsigned long)skb;
+
+ id = GET_ID_FROM_FREELIST(np->rx_skbs);
+ np->rx_skbs[id] = skb;
+
+ np->net_ring->rx_ring[i].req.id = (unsigned short)id;
np->net_ring->rx_ring[i].req.addr =
virt_to_machine(get_ppte(skb->head));
}
- np->net_ring->rx_req_prod = i;
+ np->net_idx->rx_req_prod = i;
- np->net_ring->rx_event = RX_RING_INC(np->rx_resp_cons);
+ np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons);
/*
* We may have allocated buffers which have entries outstanding in
@@ -227,17 +247,17 @@ static void network_free_rx_buffers(struct net_device *dev)
struct sk_buff *skb;
for ( i = np->rx_resp_cons;
- i != np->net_ring->rx_req_prod;
+ i != np->net_idx->rx_req_prod;
i = RX_RING_INC(i) )
{
- skb = (struct sk_buff *)np->net_ring->rx_ring[i].req.id;
+ skb = np->rx_skbs[np->net_ring->rx_ring[i].req.id];
dev_kfree_skb_any(skb);
}
}
static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- unsigned int i;
+ unsigned int i, id;
struct net_private *np = (struct net_private *)dev->priv;
if ( np->tx_full )
@@ -246,7 +266,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif_stop_queue(dev);
return -ENOBUFS;
}
- i = np->net_ring->tx_req_prod;
+ i = np->net_idx->tx_req_prod;
if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
{
@@ -258,11 +278,14 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb = new_skb;
}
- np->net_ring->tx_ring[i].req.id = (unsigned long)skb;
+ id = GET_ID_FROM_FREELIST(np->tx_skbs);
+ np->tx_skbs[id] = skb;
+
+ np->net_ring->tx_ring[i].req.id = (unsigned short)id;
np->net_ring->tx_ring[i].req.addr =
phys_to_machine(virt_to_phys(skb->data));
np->net_ring->tx_ring[i].req.size = skb->len;
- np->net_ring->tx_req_prod = TX_RING_INC(i);
+ np->net_idx->tx_req_prod = TX_RING_INC(i);
atomic_inc(&np->tx_entries);
np->stats.tx_bytes += skb->len;
@@ -294,11 +317,13 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
again:
for ( i = np->rx_resp_cons;
- i != np->net_ring->rx_resp_prod;
+ i != np->net_idx->rx_resp_prod;
i = RX_RING_INC(i) )
{
rx = &np->net_ring->rx_ring[i].resp;
- skb = (struct sk_buff *)rx->id;
+
+ skb = np->rx_skbs[rx->id];
+ ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
if ( rx->status != RING_STATUS_OK )
{
@@ -344,7 +369,7 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
/* Deal with hypervisor racing our resetting of rx_event. */
smp_mb();
- if ( np->net_ring->rx_resp_prod != i ) goto again;
+ if ( np->net_idx->rx_resp_prod != i ) goto again;
}
@@ -389,14 +414,21 @@ static struct net_device_stats *network_get_stats(struct net_device *dev)
int __init init_module(void)
{
- int i, err;
+ int i, fixmap_idx=-1, err;
struct net_device *dev;
struct net_private *np;
INIT_LIST_HEAD(&dev_list);
- for ( i = 0; i < start_info.num_net_rings; i++ )
+ for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
{
+ if ( start_info.net_rings[i] == 0 )
+ continue;
+
+ /* We actually only support up to 4 vifs right now. */
+ if ( ++fixmap_idx == 4 )
+ break;
+
dev = alloc_etherdev(sizeof(struct net_private));
if ( dev == NULL )
{
@@ -404,8 +436,11 @@ int __init init_module(void)
goto fail;
}
+ set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]);
+
np = dev->priv;
- np->net_ring = start_info.net_rings + i;
+ np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx);
+ np->net_idx = &HYPERVISOR_shared_info->net_idx[i];
SET_MODULE_OWNER(dev);
dev->open = network_open;
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c
index eb24b1ccbf..0806d775f7 100644
--- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c
+++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c
@@ -236,10 +236,7 @@ void __init paging_init(void)
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd);
- /*
- * XXX We do this conversion early, so that all other page tables
- * will automatically get this mapping.
- */
+ /* Cheesy: this can probably be moved to the blkdev driver. */
set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring);
#ifdef CONFIG_HIGHMEM
diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h
index 0945783047..eee16cb240 100644
--- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h
+++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h
@@ -43,6 +43,10 @@ enum fixed_addresses {
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
FIX_BLKRING_BASE,
+ FIX_NETRING0_BASE,
+ FIX_NETRING1_BASE,
+ FIX_NETRING2_BASE,
+ FIX_NETRING3_BASE,
__end_of_permanent_fixed_addresses,
__end_of_fixed_addresses
};
@@ -70,27 +74,13 @@ extern void __set_fixmap (enum fixed_addresses idx,
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-extern void __this_fixmap_does_not_exist(void);
-
/*
* 'index to address' translation. If anyone tries to use the idx
* directly without tranlation, we catch the bug with a NULL-deference
* kernel oops. Illegal ranges of incoming indices are caught too.
*/
-static inline unsigned long fix_to_virt(const unsigned int idx)
+static inline unsigned long fix_to_virt(unsigned int idx)
{
- /*
- * this branch gets completely eliminated after inlining,
- * except when someone tries to use fixaddr indices in an
- * illegal way. (such as mixing up address types or using
- * out-of-range indices).
- *
- * If it doesn't get removed, the linker will complain
- * loudly with a reasonably clear error message..
- */
- if (idx >= __end_of_fixed_addresses)
- __this_fixmap_does_not_exist();
-
return __fix_to_virt(idx);
}