diff options
-rw-r--r-- | xen/arch/i386/smp.c | 59 | ||||
-rw-r--r-- | xen/common/domain.c | 51 | ||||
-rw-r--r-- | xen/common/event.c | 4 | ||||
-rw-r--r-- | xen/common/kernel.c | 1 | ||||
-rw-r--r-- | xen/common/network.c | 107 | ||||
-rw-r--r-- | xen/include/asm-i386/flushtlb.h | 14 | ||||
-rw-r--r-- | xen/include/asm-i386/pgalloc.h | 15 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/hypervisor-if.h | 15 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/network.h | 31 | ||||
-rw-r--r-- | xen/include/xeno/mm.h | 1 | ||||
-rw-r--r-- | xen/include/xeno/sched.h | 7 | ||||
-rw-r--r-- | xen/include/xeno/vif.h | 43 | ||||
-rw-r--r-- | xen/net/dev.c | 397 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c | 107 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c | 5 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h | 20 |
16 files changed, 420 insertions, 457 deletions
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c index bc0952fc31..c049ed0e50 100644 --- a/xen/arch/i386/smp.c +++ b/xen/arch/i386/smp.c @@ -255,8 +255,10 @@ static inline void send_IPI_all(int vector) */ static volatile unsigned long flush_cpumask; +#if 0 static struct mm_struct * flush_mm; static unsigned long flush_va; +#endif static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED; #define FLUSH_ALL 0xffffffff @@ -323,90 +325,55 @@ asmlinkage void smp_invalidate_interrupt (void) if (!test_bit(cpu, &flush_cpumask)) return; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - + +#if 0 if (flush_mm == cpu_tlbstate[cpu].active_mm) { if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { if (flush_va == FLUSH_ALL) +#endif local_flush_tlb(); +#if 0 else __flush_tlb_one(flush_va); } else leave_mm(cpu); } +#endif ack_APIC_irq(); clear_bit(cpu, &flush_cpumask); } -static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, - unsigned long va) +void flush_tlb_others(unsigned long cpumask) { - /* - * A couple of (to be removed) sanity checks: - * - * - we do not send IPIs to not-yet booted CPUs. - * - current CPU must not be in mask - * - mask must exist :) - */ - if (!cpumask) - BUG(); - if ((cpumask & cpu_online_map) != cpumask) - BUG(); - if (cpumask & (1 << smp_processor_id())) - BUG(); - if (!mm) - BUG(); - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * Temporarily this turns IRQs off, so that lockups are - * detected by the NMI watchdog. - */ spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; atomic_set_mask(cpumask, &flush_cpumask); - /* - * We have to send the IPI only to - * CPUs affected. - */ send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); - - while (flush_cpumask) - /* nothing. lockup detection does not belong here */; - - flush_mm = NULL; - flush_va = 0; + while (flush_cpumask) continue; spin_unlock(&tlbstate_lock); } void flush_tlb_current_task(void) { +#if 0 struct mm_struct *mm = ¤t->mm; unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); local_flush_tlb(); if (cpu_mask) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); +#endif } void flush_tlb_mm (struct mm_struct * mm) { +#if 0 unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id()); if (current->active_mm == mm) local_flush_tlb(); if (cpu_mask) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); +#endif } #if 0 diff --git a/xen/common/domain.c b/xen/common/domain.c index 14fcf8eaec..c4114ae8c7 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -62,13 +62,9 @@ struct task_struct *do_newdomain(unsigned int dom_id, unsigned int cpu) p->addr_limit = USER_DS; p->active_mm = &p->mm; - p->num_net_vifs = 0; sched_add_domain(p); - INIT_LIST_HEAD(&p->net_vifs); - - p->net_ring_base = (net_ring_t *)(p->shared_info + 1); INIT_LIST_HEAD(&p->pg_head); p->max_pages = p->tot_pages = 0; write_lock_irqsave(&tasklist_lock, flags); @@ -112,8 +108,7 @@ void kill_domain_with_errmsg(const char *err) void __kill_domain(struct task_struct *p) { - struct list_head *ent; - net_vif_t *vif; + int i; if ( p->domain == 0 ) { @@ -128,11 +123,8 @@ void __kill_domain(struct task_struct *p) unlink_blkdev_info(p); - while ( (ent = p->net_vifs.next) != &p->net_vifs ) - { - vif = list_entry(ent, net_vif_t, dom_list); - unlink_net_vif(vif); - } + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + unlink_net_vif(p->net_vif_list[i]); if ( p == current ) { @@ -300,8 +292,9 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo) start_info_t * virt_startinfo_addr; unsigned long virt_stack_addr; unsigned long phys_l2tab; - net_ring_t *net_ring; + net_ring_t *shared_rings; net_vif_t *net_vif; + int i; /* High entries in page table must contain hypervisor * mem mappings - set them up. @@ -363,15 +356,16 @@ int final_setup_guestos(struct task_struct * p, dom_meminfo_t * meminfo) /* Add virtual network interfaces and point to them in startinfo. */ while (meminfo->num_vifs-- > 0) { net_vif = create_net_vif(p->domain); - net_ring = net_vif->net_ring; - if (!net_ring) panic("no network ring!\n"); + shared_rings = net_vif->shared_rings; + if (!shared_rings) panic("no network ring!\n"); } -/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */ -#define SH2G(_x) (meminfo->virt_shinfo_addr | (((unsigned long)(_x)) & 0xFFF)) - - virt_startinfo_addr->net_rings = (net_ring_t *)SH2G(p->net_ring_base); - virt_startinfo_addr->num_net_rings = p->num_net_vifs; + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + if ( p->net_vif_list[i] == NULL ) continue; + virt_startinfo_addr->net_rings[i] = + virt_to_phys(p->net_vif_list[i]->shared_rings); + } /* Add block io interface */ virt_startinfo_addr->blk_ring = virt_to_phys(p->blk_ring_base); @@ -422,7 +416,7 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params, l2_pgentry_t *l2tab, *l2start; l1_pgentry_t *l1tab = NULL, *l1start = NULL; struct pfn_info *page = NULL; - net_ring_t *net_ring; + net_ring_t *shared_rings; net_vif_t *net_vif; /* Sanity! */ @@ -581,7 +575,6 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params, p->shared_info->cpu_freq = cpu_freq; p->shared_info->domain_time = 0; - virt_startinfo_address = (start_info_t *) (virt_load_address + ((alloc_index - 1) << PAGE_SHIFT)); virt_stack_address = (unsigned long)virt_startinfo_address; @@ -628,16 +621,16 @@ int setup_guestos(struct task_struct *p, dom0_newdomain_t *params, /* Add virtual network interfaces and point to them in startinfo. */ while (params->num_vifs-- > 0) { net_vif = create_net_vif(dom); - net_ring = net_vif->net_ring; - if (!net_ring) panic("no network ring!\n"); + shared_rings = net_vif->shared_rings; + if (!shared_rings) panic("no network ring!\n"); } -/* XXX SMH: horrible hack to convert hypervisor VAs in SHIP to guest VAs */ -#define SHIP2GUEST(_x) (virt_shinfo_address | (((unsigned long)(_x)) & 0xFFF)) - - virt_startinfo_address->net_rings = - (net_ring_t *)SHIP2GUEST(p->net_ring_base); - virt_startinfo_address->num_net_rings = p->num_net_vifs; + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + if ( p->net_vif_list[i] == NULL ) continue; + virt_startinfo_address->net_rings[i] = + virt_to_phys(p->net_vif_list[i]->shared_rings); + } /* Add block io interface */ virt_startinfo_address->blk_ring = virt_to_phys(p->blk_ring_base); diff --git a/xen/common/event.c b/xen/common/event.c index 2774806443..b2365f7dfe 100644 --- a/xen/common/event.c +++ b/xen/common/event.c @@ -14,14 +14,12 @@ typedef void (*hyp_event_callback_fn_t)(void); extern void schedule(void); -extern void update_shared_ring(void); /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */ static hyp_event_callback_fn_t event_call_fn[] = { schedule, - update_shared_ring, - kill_domain, + kill_domain }; /* Handle outstanding events for the currently-executing domain. */ diff --git a/xen/common/kernel.c b/xen/common/kernel.c index ad321af3fb..8d569182d3 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -176,7 +176,6 @@ void cmain (unsigned long magic, multiboot_info_t *mbi) current->shared_info = (void *)get_free_page(GFP_KERNEL); memset(current->shared_info, 0, sizeof(shared_info_t)); set_fs(USER_DS); - current->num_net_vifs = 0; start_of_day(); diff --git a/xen/common/network.c b/xen/common/network.c index 2cdf11a9a5..e7111fa0f3 100644 --- a/xen/common/network.c +++ b/xen/common/network.c @@ -50,44 +50,43 @@ void print_net_rule_list(); net_vif_t *create_net_vif(int domain) { - net_vif_t *new_vif; - net_ring_t *new_ring; - net_shadow_ring_t *shadow_ring; - struct task_struct *dom_task; + int dom_vif_idx; + net_vif_t *new_vif = NULL; + net_ring_t *new_ring = NULL; + struct task_struct *p = NULL; unsigned long flags; - if ( !(dom_task = find_domain_by_id(domain)) ) + if ( !(p = find_domain_by_id(domain)) ) return NULL; - if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL ) - return NULL; - - new_ring = dom_task->net_ring_base + dom_task->num_net_vifs; - memset(new_ring, 0, sizeof(net_ring_t)); + for ( dom_vif_idx = 0; dom_vif_idx < MAX_DOMAIN_VIFS; dom_vif_idx++ ) + if ( p->net_vif_list[dom_vif_idx] == NULL ) break; + if ( dom_vif_idx == MAX_DOMAIN_VIFS ) + goto fail; - shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL); - if ( shadow_ring == NULL ) goto fail; - memset(shadow_ring, 0, sizeof(*shadow_ring)); + if ( (new_vif = kmem_cache_alloc(net_vif_cache, GFP_KERNEL)) == NULL ) + goto fail; - shadow_ring->rx_ring = kmalloc(RX_RING_SIZE - * sizeof(rx_shadow_entry_t), GFP_KERNEL); - shadow_ring->tx_ring = kmalloc(TX_RING_SIZE - * sizeof(tx_shadow_entry_t), GFP_KERNEL); - if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) ) - goto fail; + memset(new_vif, 0, sizeof(*new_vif)); + + if ( sizeof(net_ring_t) > PAGE_SIZE ) BUG(); + new_ring = (net_ring_t *)get_free_page(GFP_KERNEL); + clear_page(new_ring); + SHARE_PFN_WITH_DOMAIN(virt_to_page(new_ring), domain); /* * Fill in the new vif struct. Note that, while the vif's refcnt is * non-zero, we hold a reference to the task structure. */ atomic_set(&new_vif->refcnt, 1); - new_vif->net_ring = new_ring; - new_vif->shadow_ring = shadow_ring; - new_vif->domain = dom_task; - new_vif->list.next = NULL; + new_vif->shared_rings = new_ring; + new_vif->shared_idxs = &p->shared_info->net_idx[dom_vif_idx]; + new_vif->domain = p; + new_vif->list.next = NULL; + spin_lock_init(&new_vif->rx_lock); + spin_lock_init(&new_vif->tx_lock); - list_add(&new_vif->dom_list, &dom_task->net_vifs); - dom_task->num_net_vifs++; + p->net_vif_list[dom_vif_idx] = new_vif; write_lock_irqsave(&sys_vif_lock, flags); new_vif->id = sys_vif_count; @@ -96,16 +95,11 @@ net_vif_t *create_net_vif(int domain) return new_vif; -fail: - kmem_cache_free(net_vif_cache, new_vif); - if ( shadow_ring != NULL ) - { - if ( shadow_ring->rx_ring ) kfree(shadow_ring->rx_ring); - if ( shadow_ring->tx_ring ) kfree(shadow_ring->tx_ring); - kfree(shadow_ring); - } - - free_task_struct(dom_task); + fail: + if ( new_vif != NULL ) + kmem_cache_free(net_vif_cache, new_vif); + if ( p != NULL ) + free_task_struct(p); return NULL; } @@ -118,25 +112,33 @@ void destroy_net_vif(net_vif_t *vif) /* Return any outstanding receive buffers to the guest OS. */ spin_lock_irqsave(&p->page_lock, flags); - for ( i = vif->shadow_ring->rx_idx; - i != vif->shadow_ring->rx_req_cons; - i = ((i+1) & (RX_RING_SIZE-1)) ) + for ( i = vif->rx_cons; i != vif->rx_prod; i = ((i+1) & (RX_RING_SIZE-1)) ) { - rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i; - if ( rx->status != RING_STATUS_OK ) continue; - pte = map_domain_mem(rx->addr); - *pte |= _PAGE_PRESENT; - page = frame_table + (*pte >> PAGE_SHIFT); - page->flags &= ~PG_type_mask; - if ( (*pte & _PAGE_RW) ) + rx_shadow_entry_t *rx = vif->rx_shadow_ring + i; + + /* Release the page-table page. */ + page = frame_table + (rx->pte_ptr >> PAGE_SHIFT); + put_page_type(page); + put_page_tot(page); + + /* Give the buffer page back to the domain. */ + page = frame_table + rx->buf_pfn; + list_add(&page->list, &p->pg_head); + page->flags = vif->domain->domain; + + /* Patch up the PTE if it hasn't changed under our feet. */ + pte = map_domain_mem(rx->pte_ptr); + if ( !(*pte & _PAGE_PRESENT) ) + { + *pte = (rx->buf_pfn<<PAGE_SHIFT) | (*pte & ~PAGE_MASK) | + _PAGE_RW | _PAGE_PRESENT; page->flags |= PGT_writeable_page | PG_need_flush; + page->type_count = page->tot_count = 1; + } unmap_domain_mem(pte); } spin_unlock_irqrestore(&p->page_lock, flags); - kfree(vif->shadow_ring->tx_ring); - kfree(vif->shadow_ring->rx_ring); - kfree(vif->shadow_ring); kmem_cache_free(net_vif_cache, vif); free_task_struct(p); } @@ -144,11 +146,16 @@ void destroy_net_vif(net_vif_t *vif) void unlink_net_vif(net_vif_t *vif) { unsigned long flags; - list_del(&vif->dom_list); - vif->domain->num_net_vifs--; + int i; + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + if ( vif->domain->net_vif_list[i] == vif ) + vif->domain->net_vif_list[i] = NULL; + write_lock_irqsave(&sys_vif_lock, flags); sys_vif_list[vif->id] = NULL; write_unlock_irqrestore(&sys_vif_lock, flags); + put_vif(vif); } diff --git a/xen/include/asm-i386/flushtlb.h b/xen/include/asm-i386/flushtlb.h index 306839c6a4..a021597ad6 100644 --- a/xen/include/asm-i386/flushtlb.h +++ b/xen/include/asm-i386/flushtlb.h @@ -11,27 +11,25 @@ #define __FLUSHTLB_H #include <xeno/smp.h> +#include <asm/atomic.h> -unsigned long tlb_flush_count[NR_CPUS]; -//#if 0 +atomic_t tlb_flush_count[NR_CPUS]; #define __read_cr3(__var) \ do { \ __asm__ __volatile ( \ "movl %%cr3, %0;" \ : "=r" (__var)); \ } while (0) -//#endif #define __write_cr3_counted(__pa) \ do { \ __asm__ __volatile__ ( \ "movl %0, %%cr3;" \ - :: "r" (__pa) \ + :: "r" (__pa) \ : "memory"); \ - tlb_flush_count[smp_processor_id()]++; \ + atomic_inc(&tlb_flush_count[smp_processor_id()]); \ } while (0) -//#endif #define __flush_tlb_counted() \ do { \ unsigned int tmpreg; \ @@ -39,9 +37,9 @@ unsigned long tlb_flush_count[NR_CPUS]; __asm__ __volatile__( \ "movl %%cr3, %0; # flush TLB \n" \ "movl %0, %%cr3; " \ - : "=r" (tmpreg) \ + : "=r" (tmpreg) \ :: "memory"); \ - tlb_flush_count[smp_processor_id()]++; \ + atomic_inc(&tlb_flush_count[smp_processor_id()]); \ } while (0) #endif diff --git a/xen/include/asm-i386/pgalloc.h b/xen/include/asm-i386/pgalloc.h index fcba5e1585..ed5f9cdb16 100644 --- a/xen/include/asm-i386/pgalloc.h +++ b/xen/include/asm-i386/pgalloc.h @@ -60,6 +60,11 @@ static inline void flush_tlb_mm(struct mm_struct *mm) __flush_tlb(); } +static inline void flush_tlb_cpu(unsigned int cpu) +{ + __flush_tlb(); +} + #if 0 static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) @@ -86,7 +91,6 @@ static inline void flush_tlb_range(struct mm_struct *mm, extern void flush_tlb_all(void); extern void flush_tlb_current_task(void); extern void flush_tlb_mm(struct mm_struct *); -/*extern void flush_tlb_page(struct vm_area_struct *, unsigned long);*/ #define flush_tlb() flush_tlb_current_task() @@ -95,6 +99,15 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u flush_tlb_mm(mm); } +extern void flush_tlb_others(unsigned long cpumask); +static inline void flush_tlb_cpu(unsigned int cpu) +{ + if ( cpu == smp_processor_id() ) + __flush_tlb(); + else + flush_tlb_others(1<<cpu); +} + #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 6abf2c2222..3ef6ff59ac 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -217,6 +217,14 @@ typedef struct shared_info_st { unsigned long long wall_timeout; unsigned long long domain_timeout; + /* + * The index structures are all stored here for convenience. The rings + * themselves are allocated by Xen but the guestos must create its own + * mapping -- the machine address is given in the startinfo structure to + * allow this to happen. + */ + net_idx_t net_idx[MAX_DOMAIN_VIFS]; + } shared_info_t; /* @@ -228,9 +236,10 @@ typedef struct start_info_st { unsigned long pt_base; /* VIRTUAL address of page directory */ unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ unsigned long mod_len; /* size (bytes) of pre-loaded module */ - net_ring_t *net_rings; /* network rings (VIRTUAL ADDRESS) */ - int num_net_rings; - unsigned long blk_ring; /* block io ring (MACHINE ADDRESS) */ + /* Machine address of net rings for each VIF. Will be page aligned. */ + unsigned long net_rings[MAX_DOMAIN_VIFS]; + /* Machine address of block-device ring. Will be page aligned. */ + unsigned long blk_ring; unsigned char cmd_line[1]; /* variable-length */ } start_info_t; diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h index 4d4cfe93e6..2de090ab62 100644 --- a/xen/include/hypervisor-ifs/network.h +++ b/xen/include/hypervisor-ifs/network.h @@ -17,14 +17,14 @@ typedef struct tx_req_entry_st { - unsigned long id; - unsigned long addr; /* machine address of packet */ + unsigned short id; unsigned short size; /* packet size in bytes */ + unsigned long addr; /* machine address of packet */ } tx_req_entry_t; typedef struct tx_resp_entry_st { - unsigned long id; + unsigned short id; unsigned char status; } tx_resp_entry_t; @@ -37,13 +37,13 @@ typedef union tx_entry_st typedef struct rx_req_entry_st { - unsigned long id; + unsigned short id; unsigned long addr; /* machine address of PTE to swizzle */ } rx_req_entry_t; typedef struct rx_resp_entry_st { - unsigned long id; + unsigned short id; unsigned short size; /* received packet size in bytes */ unsigned char status; /* per descriptor status */ unsigned char offset; /* offset in page of received pkt */ @@ -59,22 +59,26 @@ typedef union rx_entry_st #define TX_RING_SIZE 256 #define RX_RING_SIZE 256 +#define MAX_DOMAIN_VIFS 8 + +/* This structure must fit in a memory page. */ typedef struct net_ring_st { + tx_entry_t tx_ring[TX_RING_SIZE]; + rx_entry_t rx_ring[RX_RING_SIZE]; +} net_ring_t; + +typedef struct net_idx_st +{ /* * Guest OS places packets into ring at tx_req_prod. * Guest OS receives DOMAIN_EVENT_NET_TX when tx_resp_prod passes tx_event. - */ - tx_entry_t *tx_ring; - unsigned int tx_req_prod, tx_resp_prod, tx_event; - - /* * Guest OS places empty buffers into ring at rx_req_prod. * Guest OS receives DOMAIN_EVENT_NET_RX when rx_rssp_prod passes rx_event. */ - rx_entry_t *rx_ring; + unsigned int tx_req_prod, tx_resp_prod, tx_event; unsigned int rx_req_prod, rx_resp_prod, rx_event; -} net_ring_t; +} net_idx_t; /* * Packet routing/filtering code follows: @@ -144,7 +148,6 @@ int add_net_rule(net_rule_t *rule); /* Descriptor status values */ #define RING_STATUS_OK 0 /* Everything is gravy. */ -#define RING_STATUS_ERR_CFU 1 /* Copy from user problems. */ -#define RING_STATUS_BAD_PAGE 2 /* What they gave us was pure evil */ +#define RING_STATUS_BAD_PAGE 1 /* What they gave us was pure evil */ #endif diff --git a/xen/include/xeno/mm.h b/xen/include/xeno/mm.h index f257caffc7..6f7eaa89b7 100644 --- a/xen/include/xeno/mm.h +++ b/xen/include/xeno/mm.h @@ -97,7 +97,6 @@ typedef struct pfn_info { #define PGT_gdt_page (5<<24) /* using this page in a GDT? */ #define PGT_ldt_page (6<<24) /* using this page in an LDT? */ #define PGT_writeable_page (7<<24) /* has writable mappings of this page? */ -#define PGT_net_rx_buf (8<<24) /* this page taken by the net code. */ /* * This bit indicates that the TLB must be flushed when the type count of this diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 3bc997bed3..9f760e5e7f 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -50,8 +50,7 @@ extern struct mm_struct init_mm; } #define _HYP_EVENT_NEED_RESCHED 0 -#define _HYP_EVENT_NET 1 -#define _HYP_EVENT_DIE 2 +#define _HYP_EVENT_DIE 1 #define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */ #define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */ @@ -118,9 +117,7 @@ struct task_struct { long uwarped; /* time it ran unwarped last time */ /* Network I/O */ - net_ring_t *net_ring_base; - struct list_head net_vifs; - int num_net_vifs; + net_vif_t *net_vif_list[MAX_DOMAIN_VIFS]; /* Block I/O */ blk_ring_t *blk_ring_base; diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h index 730c1cb084..b0dc6c645e 100644 --- a/xen/include/xeno/vif.h +++ b/xen/include/xeno/vif.h @@ -27,59 +27,51 @@ typedef struct rx_shadow_entry_st { - unsigned long id; /* IN vars */ - unsigned long addr; - /* OUT vars */ - unsigned short size; - unsigned char status; - unsigned char offset; + unsigned short id; + unsigned long pte_ptr; + unsigned long buf_pfn; /* PRIVATE vars */ unsigned long flush_count; } rx_shadow_entry_t; typedef struct tx_shadow_entry_st { - unsigned long id; /* IN vars */ + unsigned short id; void *header; unsigned long payload; - unsigned short size; /* OUT vars */ + unsigned short size; unsigned char status; } tx_shadow_entry_t; -typedef struct net_shadow_ring_st { - rx_shadow_entry_t *rx_ring; +typedef struct net_vif_st { + /* The shared rings and indexes. */ + net_ring_t *shared_rings; + net_idx_t *shared_idxs; + + /* The private rings and indexes. */ + rx_shadow_entry_t rx_shadow_ring[RX_RING_SIZE]; unsigned int rx_prod; /* More buffers for filling go here. */ - unsigned int rx_idx; /* Next buffer to fill is here. */ - unsigned int rx_cons; /* Next buffer to create response for is here. */ - - tx_shadow_entry_t *tx_ring; - /* - * These cannot be derived from shared variables, as not all packets - * will end up on the shadow ring (eg. locally delivered packets). - */ + unsigned int rx_cons; /* Next buffer to fill is here. */ + tx_shadow_entry_t tx_shadow_ring[TX_RING_SIZE]; unsigned int tx_prod; /* More packets for sending go here. */ unsigned int tx_idx; /* Next packet to send is here. */ - unsigned int tx_transmitted_prod; /* Next packet to finish transmission. */ unsigned int tx_cons; /* Next packet to create response for is here. */ - /* Indexes into shared ring. */ + /* Private indexes into shared ring. */ unsigned int rx_req_cons; unsigned int rx_resp_prod; /* private version of shared variable */ unsigned int tx_req_cons; unsigned int tx_resp_prod; /* private version of shared variable */ -} net_shadow_ring_t; -typedef struct net_vif_st { - net_ring_t *net_ring; - net_shadow_ring_t *shadow_ring; + /* Miscellaneous private stuff. */ int id; struct task_struct *domain; struct list_head list; /* scheduling list */ - struct list_head dom_list; /* domain list */ atomic_t refcnt; + spinlock_t rx_lock, tx_lock; } net_vif_t; #define get_vif(_v) (atomic_inc(&(_v)->refcnt)) @@ -89,7 +81,6 @@ do { \ } while (0) \ /* VIF-related defines. */ -#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct #define MAX_SYSTEM_VIFS 256 /* vif globals */ diff --git a/xen/net/dev.c b/xen/net/dev.c index 7fbf165b0e..c19fad62a2 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -50,10 +50,10 @@ #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) static void make_tx_response(net_vif_t *vif, - unsigned long id, - unsigned char st); + unsigned short id, + unsigned char st); static void make_rx_response(net_vif_t *vif, - unsigned long id, + unsigned short id, unsigned short size, unsigned char st, unsigned char off); @@ -491,110 +491,81 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb) struct netif_rx_stats netdev_rx_stat[NR_CPUS]; -/* - * update_shared_ring(void) - * - * This replaces flush_rx_queue as the guest event handler to move packets - * queued in the guest ring up to the guest. Really, the packet is already - * there, it was page flipped in deliver_packet, but this moves the ring - * descriptor across from the shadow ring and increments the pointers. - */ -void update_shared_ring(void) -{ - rx_shadow_entry_t *rx; - tx_shadow_entry_t *tx; - net_ring_t *net_ring; - net_shadow_ring_t *shadow_ring; - net_vif_t *vif; - struct list_head *ent; - - clear_bit(_HYP_EVENT_NET, ¤t->hyp_events); - - list_for_each(ent, ¤t->net_vifs) - { - vif = list_entry(ent, net_vif_t, dom_list); - net_ring = vif->net_ring; - shadow_ring = vif->shadow_ring; - - while ( shadow_ring->rx_cons != shadow_ring->rx_idx ) - { - rx = shadow_ring->rx_ring + shadow_ring->rx_cons; - if ( rx->flush_count == tlb_flush_count[smp_processor_id()] ) - __flush_tlb(); - shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons); - make_rx_response(vif, rx->id, rx->size, rx->status, rx->offset); - } - - while ( shadow_ring->tx_cons != shadow_ring->tx_transmitted_prod ) - { - tx = shadow_ring->tx_ring + shadow_ring->tx_cons; - shadow_ring->tx_cons = RX_RING_INC(shadow_ring->tx_cons); - make_tx_response(vif, tx->id, tx->status); - } - } -} - void deliver_packet(struct sk_buff *skb, net_vif_t *vif) { - net_shadow_ring_t *shadow_ring; rx_shadow_entry_t *rx; - unsigned long *g_pte; - struct pfn_info *g_pfn, *h_pfn; + unsigned long *ptep; + struct pfn_info *old_page, *new_page, *pte_page; unsigned int i; + unsigned short size; + unsigned char offset, status = RING_STATUS_OK; memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) memset(skb->nh.raw + 18, 0, ETH_ALEN); - shadow_ring = vif->shadow_ring; - if ( (i = shadow_ring->rx_idx) == shadow_ring->rx_prod ) + if ( (i = vif->rx_cons) == vif->rx_prod ) return; - rx = shadow_ring->rx_ring + i; + rx = vif->rx_shadow_ring + i; - ASSERT(rx->status == RING_STATUS_OK); - ASSERT(skb->len <= PAGE_SIZE); - - rx->size = skb->len; - rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK); + size = (unsigned short)skb->len; + offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK); spin_lock(&vif->domain->page_lock); - g_pte = map_domain_mem(rx->addr); + /* Release the page-table page. */ + pte_page = frame_table + (rx->pte_ptr >> PAGE_SHIFT); + put_page_type(pte_page); + put_page_tot(pte_page); - g_pfn = frame_table + (*g_pte >> PAGE_SHIFT); - h_pfn = skb->pf; - - h_pfn->tot_count = 1; - g_pfn->tot_count = g_pfn->type_count = h_pfn->type_count = 0; - h_pfn->flags = g_pfn->flags & ~PG_type_mask; - g_pfn->flags = 0; - - if ( (*g_pte & _PAGE_RW) ) + old_page = frame_table + rx->buf_pfn; + new_page = skb->pf; + + ptep = map_domain_mem(rx->pte_ptr); + + if ( (*ptep & _PAGE_PRESENT) ) { - h_pfn->flags |= PGT_writeable_page | PG_need_flush; - h_pfn->type_count = 1; + /* Bail out if the PTE has been reused under our feet. */ + list_add(&old_page->list, &vif->domain->pg_head); + old_page->flags = vif->domain->domain; + status = RING_STATUS_BAD_PAGE; + goto out; } - - /* Point the guest at the new machine frame. */ - machine_to_phys_mapping[h_pfn - frame_table] - = machine_to_phys_mapping[g_pfn - frame_table]; - *g_pte = (*g_pte & ~PAGE_MASK) - | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK); - *g_pte |= _PAGE_PRESENT; - - unmap_domain_mem(g_pte); - list_del(&g_pfn->list); - list_add(&h_pfn->list, &vif->domain->pg_head); + /* Give the new page to the domain, marking it writeable. */ + new_page->tot_count = new_page->type_count = 1; + new_page->flags = vif->domain->domain | PGT_writeable_page | PG_need_flush; + list_add(&new_page->list, &vif->domain->pg_head); + + /* Patch the PTE to map the new page as writeable. */ + machine_to_phys_mapping[new_page - frame_table] + = machine_to_phys_mapping[old_page - frame_table]; + *ptep = (*ptep & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT | + (((new_page - frame_table) << PAGE_SHIFT) & PAGE_MASK); + + unmap_domain_mem(ptep); spin_unlock(&vif->domain->page_lock); /* Our skbuff now points at the guest's old frame. */ - skb->pf = g_pfn; + skb->pf = old_page; + + /* Updates must happen before releasing the descriptor. */ + smp_wmb(); - smp_wmb(); /* updates must happen before releasing the descriptor. */ - shadow_ring->rx_idx = RX_RING_INC(i); + /* + * NB. The remote flush here should be safe, as we hold no locks. The + * network driver that called us should also have no nasty locks. + */ + rx = vif->rx_shadow_ring + vif->rx_cons; + if ( rx->flush_count == + atomic_read(&tlb_flush_count[vif->domain->processor]) ) + flush_tlb_cpu(vif->domain->processor); + + out: + vif->rx_cons = RX_RING_INC(vif->rx_cons); + make_rx_response(vif, rx->id, size, status, offset); } /** @@ -613,7 +584,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) int netif_rx(struct sk_buff *skb) { - unsigned long cpu_mask; int offset, this_cpu = smp_processor_id(); unsigned long flags; net_vif_t *vif; @@ -655,14 +625,11 @@ int netif_rx(struct sk_buff *skb) get_vif(vif); read_unlock(&sys_vif_lock); - deliver_packet(skb, vif); - cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET); put_vif(vif); unmap_domain_mem(skb->head); kfree_skb(skb); - hyp_event_notify(cpu_mask); local_irq_restore(flags); return NET_RX_SUCCESS; } @@ -724,7 +691,8 @@ static void tx_skb_release(struct sk_buff *skb) { int i; net_vif_t *vif = sys_vif_list[skb->src_vif]; - unsigned long cpu_mask, flags; + tx_shadow_entry_t *tx; + unsigned long flags; spin_lock_irqsave(&vif->domain->page_lock, flags); for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) @@ -736,10 +704,9 @@ static void tx_skb_release(struct sk_buff *skb) skb_shinfo(skb)->nr_frags = 0; - vif->shadow_ring->tx_transmitted_prod = - TX_RING_INC(vif->shadow_ring->tx_transmitted_prod); - cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET); - hyp_event_notify(cpu_mask); + tx = vif->tx_shadow_ring + vif->tx_cons; + vif->tx_cons = TX_RING_INC(vif->tx_cons); + make_tx_response(vif, tx->id, tx->status); put_vif(vif); } @@ -762,7 +729,7 @@ static void net_tx_action(unsigned long unused) vif = list_entry(ent, net_vif_t, list); get_vif(vif); remove_from_net_schedule_list(vif); - if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod ) + if ( vif->tx_idx == vif->tx_prod ) { put_vif(vif); continue; @@ -777,9 +744,9 @@ static void net_tx_action(unsigned long unused) } /* Pick an entry from the transmit queue. */ - tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx]; - vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx); - if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod ) + tx = &vif->tx_shadow_ring[vif->tx_idx]; + vif->tx_idx = TX_RING_INC(vif->tx_idx); + if ( vif->tx_idx != vif->tx_prod ) add_to_net_schedule_list_tail(vif); ASSERT(tx->status == RING_STATUS_OK); @@ -1790,26 +1757,27 @@ inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev) long do_net_update(void) { - struct list_head *ent; - net_ring_t *net_ring; - net_shadow_ring_t *shadow_ring; - net_vif_t *current_vif; - unsigned int i, j; + net_ring_t *shared_rings; + net_vif_t *vif; + net_idx_t *shared_idxs; + unsigned int i, j, idx; struct sk_buff *skb; tx_req_entry_t tx; rx_req_entry_t rx; - unsigned long pfn; - struct pfn_info *page; - unsigned long *g_pte; + unsigned long pte_pfn, buf_pfn; + struct pfn_info *pte_page, *buf_page; + unsigned long *ptep; int target; u8 *g_data; unsigned short protocol; - - list_for_each(ent, ¤t->net_vifs) + + for ( idx = 0; idx < MAX_DOMAIN_VIFS; idx++ ) { - current_vif = list_entry(ent, net_vif_t, dom_list); - net_ring = current_vif->net_ring; - shadow_ring = current_vif->shadow_ring; + if ( (vif = current->net_vif_list[idx]) == NULL ) + break; + + shared_idxs = vif->shared_idxs; + shared_rings = vif->shared_rings; /* * PHASE 1 -- TRANSMIT RING @@ -1820,23 +1788,18 @@ long do_net_update(void) * new producer index, but take care not to catch up with our own * consumer index. */ - j = shadow_ring->tx_prod; - for ( i = shadow_ring->tx_req_cons; - (i != net_ring->tx_req_prod) && - (((shadow_ring->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); + j = vif->tx_prod; + for ( i = vif->tx_req_cons; + (i != shared_idxs->tx_req_prod) && + (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); i = TX_RING_INC(i) ) { - if ( copy_from_user(&tx, &net_ring->tx_ring[i].req, sizeof(tx)) ) - { - DPRINTK("Bad copy_from_user for tx net descriptor\n"); - make_tx_response(current_vif, tx.id, RING_STATUS_ERR_CFU); - continue; - } + tx = shared_rings->tx_ring[i].req; if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) { DPRINTK("Bad packet size: %d\n", tx.size); - make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); + make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); continue; } @@ -1845,19 +1808,19 @@ long do_net_update(void) { DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); - make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); + make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); continue; } - pfn = tx.addr >> PAGE_SHIFT; - page = frame_table + pfn; + buf_pfn = tx.addr >> PAGE_SHIFT; + buf_page = frame_table + buf_pfn; spin_lock_irq(¤t->page_lock); - if ( (pfn >= max_page) || - ((page->flags & PG_domain_mask) != current->domain) ) + if ( (buf_pfn >= max_page) || + ((buf_page->flags & PG_domain_mask) != current->domain) ) { DPRINTK("Bad page frame\n"); spin_unlock_irq(¤t->page_lock); - make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); + make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); continue; } @@ -1867,22 +1830,22 @@ long do_net_update(void) init_tx_header(g_data, tx.size, the_dev)); if ( protocol == 0 ) { - make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); + make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); goto tx_unmap_and_continue; } - target = __net_get_target_vif(g_data, tx.size, current_vif->id); + target = __net_get_target_vif(g_data, tx.size, vif->id); if ( target > VIF_PHYSICAL_INTERFACE ) { /* Local delivery */ if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) { - make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); + make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); goto tx_unmap_and_continue; } - skb->src_vif = current_vif->id; + skb->src_vif = vif->id; skb->dst_vif = target; skb->protocol = protocol; @@ -1902,24 +1865,24 @@ long do_net_update(void) (void)netif_rx(skb); - make_tx_response(current_vif, tx.id, RING_STATUS_OK); + make_tx_response(vif, tx.id, RING_STATUS_OK); } else if ( target == VIF_PHYSICAL_INTERFACE ) { - shadow_ring->tx_ring[j].id = tx.id; - shadow_ring->tx_ring[j].size = tx.size; - shadow_ring->tx_ring[j].status = RING_STATUS_OK; - shadow_ring->tx_ring[j].header = + vif->tx_shadow_ring[j].id = tx.id; + vif->tx_shadow_ring[j].size = tx.size; + vif->tx_shadow_ring[j].status = RING_STATUS_OK; + vif->tx_shadow_ring[j].header = kmem_cache_alloc(net_header_cachep, GFP_KERNEL); - if ( shadow_ring->tx_ring[j].header == NULL ) + if ( vif->tx_shadow_ring[j].header == NULL ) { - make_tx_response(current_vif, tx.id, RING_STATUS_OK); + make_tx_response(vif, tx.id, RING_STATUS_OK); goto tx_unmap_and_continue; } - memcpy(shadow_ring->tx_ring[j].header, g_data, PKT_PROT_LEN); - shadow_ring->tx_ring[j].payload = tx.addr + PKT_PROT_LEN; - get_page_tot(page); + memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN); + vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN; + get_page_tot(buf_page); j = TX_RING_INC(j); } @@ -1928,13 +1891,13 @@ long do_net_update(void) spin_unlock_irq(¤t->page_lock); } - shadow_ring->tx_req_cons = i; + vif->tx_req_cons = i; - if ( shadow_ring->tx_prod != j ) + if ( vif->tx_prod != j ) { smp_mb(); /* Let other CPUs see new descriptors first. */ - shadow_ring->tx_prod = j; - add_to_net_schedule_list_tail(current_vif); + vif->tx_prod = j; + add_to_net_schedule_list_tail(vif); maybe_schedule_tx_action(); } @@ -1947,77 +1910,79 @@ long do_net_update(void) * new producer index, but take care not to catch up with our own * consumer index. */ - j = shadow_ring->rx_prod; - for ( i = shadow_ring->rx_req_cons; - (i != net_ring->rx_req_prod) && - (((shadow_ring->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); + j = vif->rx_prod; + for ( i = vif->rx_req_cons; + (i != shared_idxs->rx_req_prod) && + (((vif->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); i = RX_RING_INC(i) ) { - if ( copy_from_user(&rx, &net_ring->rx_ring[i].req, sizeof(rx)) ) - { - DPRINTK("Bad copy_from_user for rx net descriptor\n"); - make_rx_response(current_vif, - rx.id, 0, RING_STATUS_ERR_CFU, 0); - continue; - } + rx = shared_rings->rx_ring[i].req; - pfn = rx.addr >> PAGE_SHIFT; - page = frame_table + pfn; + pte_pfn = rx.addr >> PAGE_SHIFT; + pte_page = frame_table + pte_pfn; spin_lock_irq(¤t->page_lock); - if ( (pfn >= max_page) || - (page->flags != (PGT_l1_page_table | current->domain)) ) + if ( (pte_pfn >= max_page) || + ((pte_page->flags & (PG_type_mask | PG_domain_mask)) != + (PGT_l1_page_table | current->domain)) ) { DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n", - current->domain, pfn, max_page, page->flags); + current->domain, pte_pfn, max_page, pte_page->flags); spin_unlock_irq(¤t->page_lock); - make_rx_response(current_vif, - rx.id, 0, RING_STATUS_BAD_PAGE, 0); + make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); continue; } - g_pte = map_domain_mem(rx.addr); + ptep = map_domain_mem(rx.addr); - if ( !(*g_pte & _PAGE_PRESENT) ) + if ( !(*ptep & _PAGE_PRESENT) ) { DPRINTK("Invalid PTE passed down (not present)\n"); - make_rx_response(current_vif, - rx.id, 0, RING_STATUS_BAD_PAGE, 0); + make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); goto rx_unmap_and_continue; } - page = (*g_pte >> PAGE_SHIFT) + frame_table; - - if ( page->tot_count != 1 ) + buf_pfn = *ptep >> PAGE_SHIFT; + buf_page = frame_table + buf_pfn; + + if ( ((buf_page->flags & (PG_type_mask | PG_domain_mask)) != + (PGT_writeable_page | current->domain)) || + (buf_page->tot_count != 1) ) { - DPRINTK("RX page mapped multple times (%d/%d/%08x)\n", - page->type_count, page->tot_count, page->flags); - make_rx_response(current_vif, - rx.id, 0, RING_STATUS_BAD_PAGE, 0); + DPRINTK("Need a mapped-once writeable page (%d/%d/%08x)\n", + buf_page->type_count, buf_page->tot_count, buf_page->flags); + make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); goto rx_unmap_and_continue; } - /* The pte they passed was good, so take it away from them. */ - *g_pte &= ~_PAGE_PRESENT; - page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf; - shadow_ring->rx_ring[j].id = rx.id; - shadow_ring->rx_ring[j].addr = rx.addr; - shadow_ring->rx_ring[j].status = RING_STATUS_OK; - shadow_ring->rx_ring[j].flush_count = - tlb_flush_count[smp_processor_id()]; + /* + * The pte they passed was good, so take it away from them. We + * also lock down the page-table page, so it doesn't go away. + */ + get_page_type(pte_page); + get_page_tot(pte_page); + *ptep &= ~_PAGE_PRESENT; + buf_page->flags = buf_page->type_count = buf_page->tot_count = 0; + list_del(&buf_page->list); + + vif->rx_shadow_ring[j].id = rx.id; + vif->rx_shadow_ring[j].pte_ptr = rx.addr; + vif->rx_shadow_ring[j].buf_pfn = buf_pfn; + vif->rx_shadow_ring[j].flush_count = + atomic_read(&tlb_flush_count[smp_processor_id()]); j = RX_RING_INC(j); rx_unmap_and_continue: - unmap_domain_mem(g_pte); + unmap_domain_mem(ptep); spin_unlock_irq(¤t->page_lock); } - shadow_ring->rx_req_cons = i; + vif->rx_req_cons = i; - if ( shadow_ring->rx_prod != j ) + if ( vif->rx_prod != j ) { smp_mb(); /* Let other CPUs see new descriptors first. */ - shadow_ring->rx_prod = j; + vif->rx_prod = j; } } @@ -2025,55 +1990,57 @@ long do_net_update(void) } -static void make_tx_response(net_vif_t *vif, - unsigned long id, - unsigned char st) +static void make_tx_response(net_vif_t *vif, + unsigned short id, + unsigned char st) { unsigned long flags; - net_shadow_ring_t *shadow = vif->shadow_ring; unsigned int pos; - tx_resp_entry_t *resp, privresp; + tx_resp_entry_t *resp; /* Place on the response ring for the relevant domain. */ - local_irq_save(flags); - pos = shadow->tx_resp_prod; - resp = &vif->net_ring->tx_ring[pos].resp; - privresp.id = id; - privresp.status = st; - copy_to_user(resp, &privresp, sizeof(privresp)); + spin_lock_irqsave(&vif->tx_lock, flags); + pos = vif->tx_resp_prod; + resp = &vif->shared_rings->tx_ring[pos].resp; + resp->id = id; + resp->status = st; pos = TX_RING_INC(pos); - shadow->tx_resp_prod = vif->net_ring->tx_resp_prod = pos; - if ( pos == vif->net_ring->rx_event ) - set_bit(_EVENT_NET_TX, ¤t->shared_info->events); - local_irq_restore(flags); + vif->tx_resp_prod = vif->shared_idxs->tx_resp_prod = pos; + if ( pos == vif->shared_idxs->rx_event ) + { + unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX); + guest_event_notify(cpu_mask); + } + spin_unlock_irqrestore(&vif->tx_lock, flags); } static void make_rx_response(net_vif_t *vif, - unsigned long id, + unsigned short id, unsigned short size, unsigned char st, unsigned char off) { unsigned long flags; - net_shadow_ring_t *shadow = vif->shadow_ring; unsigned int pos; - rx_resp_entry_t *resp, privresp; + rx_resp_entry_t *resp; /* Place on the response ring for the relevant domain. */ - local_irq_save(flags); - pos = shadow->rx_resp_prod; - resp = &vif->net_ring->rx_ring[pos].resp; - privresp.id = id; - privresp.size = size; - privresp.status = st; - privresp.offset = off; - copy_to_user(resp, &privresp, sizeof(privresp)); + spin_lock_irqsave(&vif->rx_lock, flags); + pos = vif->rx_resp_prod; + resp = &vif->shared_rings->rx_ring[pos].resp; + resp->id = id; + resp->size = size; + resp->status = st; + resp->offset = off; pos = RX_RING_INC(pos); - shadow->rx_resp_prod = vif->net_ring->rx_resp_prod = pos; - if ( pos == vif->net_ring->rx_event ) - set_bit(_EVENT_NET_RX, ¤t->shared_info->events); - local_irq_restore(flags); + vif->rx_resp_prod = vif->shared_idxs->rx_resp_prod = pos; + if ( pos == vif->shared_idxs->rx_event ) + { + unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_RX); + guest_event_notify(cpu_mask); + } + spin_unlock_irqrestore(&vif->rx_lock, flags); } diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c index 3ae5e3d1a0..03f9939e4e 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c @@ -56,9 +56,26 @@ struct net_private atomic_t tx_entries; unsigned int rx_resp_cons, tx_resp_cons, tx_full; net_ring_t *net_ring; + net_idx_t *net_idx; spinlock_t tx_lock; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[TX_RING_SIZE]; + struct sk_buff *rx_skbs[RX_RING_SIZE]; }; +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + _id; }) + static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs) { @@ -67,33 +84,33 @@ static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs) printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d," " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n", np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, - np->net_ring->tx_req_prod, np->net_ring->tx_resp_prod, - np->net_ring->tx_event, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "rx_resp_cons = %d," + " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); } static int network_open(struct net_device *dev) { struct net_private *np = dev->priv; - int error = 0; + int i, error = 0; np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; memset(&np->stats, 0, sizeof(np->stats)); spin_lock_init(&np->tx_lock); atomic_set(&np->tx_entries, 0); memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); - np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), - GFP_KERNEL); - np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), - GFP_KERNEL); - if ( (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) - { - printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); - error = -ENOBUFS; - goto fail; - } + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i < TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i < RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); network_alloc_rx_buffers(dev); @@ -135,8 +152,6 @@ static int network_open(struct net_device *dev) return 0; fail: - if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); - if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); kfree(np); return error; } @@ -154,11 +169,12 @@ static void network_tx_buf_gc(struct net_device *dev) spin_lock_irqsave(&np->tx_lock, flags); do { - prod = np->net_ring->tx_resp_prod; + prod = np->net_idx->tx_resp_prod; for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) ) { - skb = (struct sk_buff *)tx_ring[i].resp.id; + skb = np->tx_skbs[tx_ring[i].resp.id]; + ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id); dev_kfree_skb_any(skb); atomic_dec(&np->tx_entries); } @@ -166,11 +182,11 @@ static void network_tx_buf_gc(struct net_device *dev) np->tx_resp_cons = prod; /* Set a new event, then check for race with update of tx_cons. */ - np->net_ring->tx_event = + np->net_idx->tx_event = TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1); smp_mb(); } - while ( prod != np->net_ring->tx_resp_prod ); + while ( prod != np->net_idx->tx_resp_prod ); if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) { @@ -192,24 +208,28 @@ inline pte_t *get_ppte(void *addr) static void network_alloc_rx_buffers(struct net_device *dev) { - unsigned int i; + unsigned int i, id; struct net_private *np = dev->priv; struct sk_buff *skb; unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES); - for ( i = np->net_ring->rx_req_prod; i != end; i = RX_RING_INC(i) ) + for ( i = np->net_idx->rx_req_prod; i != end; i = RX_RING_INC(i) ) { skb = dev_alloc_skb(RX_BUF_SIZE); if ( skb == NULL ) break; skb->dev = dev; - np->net_ring->rx_ring[i].req.id = (unsigned long)skb; + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[i].req.id = (unsigned short)id; np->net_ring->rx_ring[i].req.addr = virt_to_machine(get_ppte(skb->head)); } - np->net_ring->rx_req_prod = i; + np->net_idx->rx_req_prod = i; - np->net_ring->rx_event = RX_RING_INC(np->rx_resp_cons); + np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons); /* * We may have allocated buffers which have entries outstanding in @@ -227,17 +247,17 @@ static void network_free_rx_buffers(struct net_device *dev) struct sk_buff *skb; for ( i = np->rx_resp_cons; - i != np->net_ring->rx_req_prod; + i != np->net_idx->rx_req_prod; i = RX_RING_INC(i) ) { - skb = (struct sk_buff *)np->net_ring->rx_ring[i].req.id; + skb = np->rx_skbs[np->net_ring->rx_ring[i].req.id]; dev_kfree_skb_any(skb); } } static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) { - unsigned int i; + unsigned int i, id; struct net_private *np = (struct net_private *)dev->priv; if ( np->tx_full ) @@ -246,7 +266,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); return -ENOBUFS; } - i = np->net_ring->tx_req_prod; + i = np->net_idx->tx_req_prod; if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) { @@ -258,11 +278,14 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = new_skb; } - np->net_ring->tx_ring[i].req.id = (unsigned long)skb; + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + np->net_ring->tx_ring[i].req.id = (unsigned short)id; np->net_ring->tx_ring[i].req.addr = phys_to_machine(virt_to_phys(skb->data)); np->net_ring->tx_ring[i].req.size = skb->len; - np->net_ring->tx_req_prod = TX_RING_INC(i); + np->net_idx->tx_req_prod = TX_RING_INC(i); atomic_inc(&np->tx_entries); np->stats.tx_bytes += skb->len; @@ -294,11 +317,13 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) again: for ( i = np->rx_resp_cons; - i != np->net_ring->rx_resp_prod; + i != np->net_idx->rx_resp_prod; i = RX_RING_INC(i) ) { rx = &np->net_ring->rx_ring[i].resp; - skb = (struct sk_buff *)rx->id; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); if ( rx->status != RING_STATUS_OK ) { @@ -344,7 +369,7 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) /* Deal with hypervisor racing our resetting of rx_event. */ smp_mb(); - if ( np->net_ring->rx_resp_prod != i ) goto again; + if ( np->net_idx->rx_resp_prod != i ) goto again; } @@ -389,14 +414,21 @@ static struct net_device_stats *network_get_stats(struct net_device *dev) int __init init_module(void) { - int i, err; + int i, fixmap_idx=-1, err; struct net_device *dev; struct net_private *np; INIT_LIST_HEAD(&dev_list); - for ( i = 0; i < start_info.num_net_rings; i++ ) + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) { + if ( start_info.net_rings[i] == 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + dev = alloc_etherdev(sizeof(struct net_private)); if ( dev == NULL ) { @@ -404,8 +436,11 @@ int __init init_module(void) goto fail; } + set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]); + np = dev->priv; - np->net_ring = start_info.net_rings + i; + np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[i]; SET_MODULE_OWNER(dev); dev->open = network_open; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c index eb24b1ccbf..0806d775f7 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c @@ -236,10 +236,7 @@ void __init paging_init(void) vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); - /* - * XXX We do this conversion early, so that all other page tables - * will automatically get this mapping. - */ + /* Cheesy: this can probably be moved to the blkdev driver. */ set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring); #ifdef CONFIG_HIGHMEM diff --git a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h index 0945783047..eee16cb240 100644 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/fixmap.h @@ -43,6 +43,10 @@ enum fixed_addresses { FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif FIX_BLKRING_BASE, + FIX_NETRING0_BASE, + FIX_NETRING1_BASE, + FIX_NETRING2_BASE, + FIX_NETRING3_BASE, __end_of_permanent_fixed_addresses, __end_of_fixed_addresses }; @@ -70,27 +74,13 @@ extern void __set_fixmap (enum fixed_addresses idx, #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -extern void __this_fixmap_does_not_exist(void); - /* * 'index to address' translation. If anyone tries to use the idx * directly without tranlation, we catch the bug with a NULL-deference * kernel oops. Illegal ranges of incoming indices are caught too. */ -static inline unsigned long fix_to_virt(const unsigned int idx) +static inline unsigned long fix_to_virt(unsigned int idx) { - /* - * this branch gets completely eliminated after inlining, - * except when someone tries to use fixaddr indices in an - * illegal way. (such as mixing up address types or using - * out-of-range indices). - * - * If it doesn't get removed, the linker will complain - * loudly with a reasonably clear error message.. - */ - if (idx >= __end_of_fixed_addresses) - __this_fixmap_does_not_exist(); - return __fix_to_virt(idx); } |