diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-07-30 20:38:09 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-07-30 20:38:09 +0000 |
commit | 2a1d0b3f0233b00c23f0b3020511fe25d0b31607 (patch) | |
tree | bb9c2d396619247beb1ac07145e1a531bdf03156 /xen | |
parent | 88886b384aa79acd3e4eac72218a5dd0a1073047 (diff) | |
download | xen-2a1d0b3f0233b00c23f0b3020511fe25d0b31607.tar.gz xen-2a1d0b3f0233b00c23f0b3020511fe25d0b31607.tar.bz2 xen-2a1d0b3f0233b00c23f0b3020511fe25d0b31607.zip |
bitkeeper revision 1.386 (3f282cb1_39eb4QUC1O0T4BEwSH9Zg)
network.c, dev.c:
Reduce hypercalsl required for network transmission.
Diffstat (limited to 'xen')
-rw-r--r-- | xen/net/dev.c | 358 |
1 files changed, 200 insertions, 158 deletions
diff --git a/xen/net/dev.c b/xen/net/dev.c index ec761a5710..b1a87ffb83 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -53,9 +53,11 @@ static struct sk_buff_head rx_skb_queue[NR_CPUS] __cacheline_aligned; -static void make_tx_response(net_vif_t *vif, - unsigned short id, - unsigned char st); +static int get_tx_bufs(net_vif_t *vif); + +static void __make_tx_response(net_vif_t *vif, + unsigned short id, + unsigned char st); static void make_rx_response(net_vif_t *vif, unsigned short id, unsigned short size, @@ -722,28 +724,7 @@ static void add_to_net_schedule_list_tail(net_vif_t *vif) } -/* Destructor function for tx skbs. */ -static void tx_skb_release(struct sk_buff *skb) -{ - int i; - net_vif_t *vif = skb->src_vif; - unsigned long flags; - - spin_lock_irqsave(&vif->domain->page_lock, flags); - for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) - put_page_tot(skb_shinfo(skb)->frags[i].page); - spin_unlock_irqrestore(&vif->domain->page_lock, flags); - - if ( skb->skb_type == SKB_NODATA ) - kmem_cache_free(net_header_cachep, skb->head); - - skb_shinfo(skb)->nr_frags = 0; - - make_tx_response(vif, skb->guest_id, RING_STATUS_OK); - - put_vif(vif); -} - +static void tx_skb_release(struct sk_buff *skb); static void net_tx_action(unsigned long unused) { @@ -762,12 +743,16 @@ static void net_tx_action(unsigned long unused) vif = list_entry(ent, net_vif_t, list); get_vif(vif); remove_from_net_schedule_list(vif); - if ( vif->tx_cons == vif->tx_prod ) + + /* Check whether there are packets to be transmitted. */ + if ( (vif->tx_cons == vif->tx_prod) && !get_tx_bufs(vif) ) { put_vif(vif); continue; } + add_to_net_schedule_list_tail(vif); + if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL ) { printk("Out of memory in net_tx_action()!\n"); @@ -779,8 +764,6 @@ static void net_tx_action(unsigned long unused) /* Pick an entry from the transmit queue. */ tx = &vif->tx_shadow_ring[vif->tx_cons]; vif->tx_cons = TX_RING_INC(vif->tx_cons); - if ( vif->tx_cons != vif->tx_prod ) - add_to_net_schedule_list_tail(vif); skb->destructor = tx_skb_release; @@ -832,6 +815,37 @@ static inline void maybe_schedule_tx_action(void) } +/* Destructor function for tx skbs. */ +static void tx_skb_release(struct sk_buff *skb) +{ + int i; + net_vif_t *vif = skb->src_vif; + unsigned long flags; + + spin_lock_irqsave(&vif->domain->page_lock, flags); + for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) + put_page_tot(skb_shinfo(skb)->frags[i].page); + spin_unlock_irqrestore(&vif->domain->page_lock, flags); + + if ( skb->skb_type == SKB_NODATA ) + kmem_cache_free(net_header_cachep, skb->head); + + skb_shinfo(skb)->nr_frags = 0; + + spin_lock_irqsave(&vif->tx_lock, flags); + __make_tx_response(vif, skb->guest_id, RING_STATUS_OK); + spin_unlock_irqrestore(&vif->tx_lock, flags); + + if ( (vif->tx_cons == vif->tx_prod) && get_tx_bufs(vif) ) + { + add_to_net_schedule_list_tail(vif); + maybe_schedule_tx_action(); + } + + put_vif(vif); +} + + /* * We need this ioctl for efficient implementation of the * if_indextoname() function required by the IPv6 API. Without @@ -1788,6 +1802,159 @@ inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev) } +static int get_tx_bufs(net_vif_t *vif) +{ + struct task_struct *p = vif->domain; + net_idx_t *shared_idxs = vif->shared_idxs; + net_ring_t *shared_rings = vif->shared_rings; + net_vif_t *target; + unsigned long buf_pfn; + struct pfn_info *buf_page; + u8 *g_data; + unsigned short protocol; + struct sk_buff *skb; + tx_req_entry_t tx; + int i, j, ret; + unsigned long flags; + + if ( vif->tx_req_cons == shared_idxs->tx_req_prod ) + return 0; + + spin_lock_irqsave(&vif->tx_lock, flags); + + j = vif->tx_prod; + + /* + * Collect up new transmit buffers. We collect up to the guest OS's new + * producer index, but take care not to catch up with our own consumer + * index. + */ + again: + for ( i = vif->tx_req_cons; + (i != shared_idxs->tx_req_prod) && + (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); + i = TX_RING_INC(i) ) + { + tx = shared_rings->tx_ring[i].req; + target = VIF_DROP; + + if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) + { + DPRINTK("Bad packet size: %d\n", tx.size); + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); + continue; + } + + /* No crossing a page boundary as the payload mustn't fragment. */ + if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) + { + DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", + tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); + continue; + } + + buf_pfn = tx.addr >> PAGE_SHIFT; + buf_page = frame_table + buf_pfn; + spin_lock(&p->page_lock); + if ( (buf_pfn >= max_page) || + ((buf_page->flags & PG_domain_mask) != p->domain) ) + { + DPRINTK("Bad page frame\n"); + spin_unlock(&p->page_lock); + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); + continue; + } + + g_data = map_domain_mem(tx.addr); + + protocol = __constant_htons( + init_tx_header(g_data, tx.size, the_dev)); + if ( protocol == 0 ) + { + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); + goto tx_unmap_and_continue; + } + + target = net_get_target_vif(g_data, tx.size, vif); + + if ( VIF_LOCAL(target) ) + { + /* Local delivery */ + if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) + { + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); + put_vif(target); + goto tx_unmap_and_continue; + } + + skb->src_vif = vif; + skb->dst_vif = target; + skb->protocol = protocol; + + /* + * We don't need a well-formed skb as netif_rx will fill these + * fields in as necessary. All we actually need is the right + * page offset in skb->data, and the right length in skb->len. + * Note that the correct address/length *excludes* link header. + */ + skb->head = (u8 *)map_domain_mem( + ((skb->pf - frame_table) << PAGE_SHIFT)); + skb->data = skb->head + 18; + memcpy(skb->data, g_data, tx.size); + skb->data += ETH_HLEN; + skb->len = tx.size - ETH_HLEN; + unmap_domain_mem(skb->head); + + netif_rx(skb); + + __make_tx_response(vif, tx.id, RING_STATUS_OK); + } + else if ( (target == VIF_PHYS) || IS_PRIV(p) ) + { + vif->tx_shadow_ring[j].id = tx.id; + vif->tx_shadow_ring[j].size = tx.size; + vif->tx_shadow_ring[j].header = + kmem_cache_alloc(net_header_cachep, GFP_KERNEL); + if ( vif->tx_shadow_ring[j].header == NULL ) + { + __make_tx_response(vif, tx.id, RING_STATUS_OK); + goto tx_unmap_and_continue; + } + + memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN); + vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN; + get_page_tot(buf_page); + j = TX_RING_INC(j); + } + else + { + __make_tx_response(vif, tx.id, RING_STATUS_DROPPED); + } + + tx_unmap_and_continue: + unmap_domain_mem(g_data); + spin_unlock(&p->page_lock); + } + + /* + * Needed as a final check for req_prod updates on another CPU. + * Also ensures that other CPUs see shadow ring updates. + */ + smp_mb(); + + if ( (vif->tx_req_cons = i) != shared_idxs->tx_req_prod ) + goto again; + + if ( (ret = (vif->tx_prod != j)) ) + vif->tx_prod = j; + + spin_unlock_irqrestore(&vif->tx_lock, flags); + + return ret; +} + + /* * do_net_update: * @@ -1801,15 +1968,10 @@ long do_net_update(void) net_vif_t *vif; net_idx_t *shared_idxs; unsigned int i, j, idx; - struct sk_buff *skb; - tx_req_entry_t tx; rx_req_entry_t rx; - unsigned long pte_pfn, buf_pfn; + unsigned long pte_pfn, buf_pfn; struct pfn_info *pte_page, *buf_page; unsigned long *ptep; - net_vif_t *target; - u8 *g_data; - unsigned short protocol; perfc_incr(net_hypercalls); @@ -1825,125 +1987,8 @@ long do_net_update(void) * PHASE 1 -- TRANSMIT RING */ - /* - * Collect up new transmit buffers. We collect up to the guest OS's - * new producer index, but take care not to catch up with our own - * consumer index. - */ - j = vif->tx_prod; - for ( i = vif->tx_req_cons; - (i != shared_idxs->tx_req_prod) && - (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); - i = TX_RING_INC(i) ) + if ( get_tx_bufs(vif) ) { - tx = shared_rings->tx_ring[i].req; - target = VIF_DROP; - - if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) - { - DPRINTK("Bad packet size: %d\n", tx.size); - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); - continue; - } - - /* No crossing a page boundary as the payload mustn't fragment. */ - if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) - { - DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", - tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); - continue; - } - - buf_pfn = tx.addr >> PAGE_SHIFT; - buf_page = frame_table + buf_pfn; - spin_lock_irq(¤t->page_lock); - if ( (buf_pfn >= max_page) || - ((buf_page->flags & PG_domain_mask) != current->domain) ) - { - DPRINTK("Bad page frame\n"); - spin_unlock_irq(¤t->page_lock); - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); - continue; - } - - g_data = map_domain_mem(tx.addr); - - protocol = __constant_htons( - init_tx_header(g_data, tx.size, the_dev)); - if ( protocol == 0 ) - { - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); - goto tx_unmap_and_continue; - } - - target = net_get_target_vif(g_data, tx.size, vif); - - if ( VIF_LOCAL(target) ) - { - /* Local delivery */ - if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) - { - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); - put_vif(target); - goto tx_unmap_and_continue; - } - - skb->src_vif = vif; - skb->dst_vif = target; - skb->protocol = protocol; - - /* - * We don't need a well-formed skb as netif_rx will fill these - * fields in as necessary. All we actually need is the right - * page offset in skb->data, and the right length in skb->len. - * Note that the correct address/length *excludes* link header. - */ - skb->head = (u8 *)map_domain_mem( - ((skb->pf - frame_table) << PAGE_SHIFT)); - skb->data = skb->head + 18; - memcpy(skb->data, g_data, tx.size); - skb->data += ETH_HLEN; - skb->len = tx.size - ETH_HLEN; - unmap_domain_mem(skb->head); - - netif_rx(skb); - - make_tx_response(vif, tx.id, RING_STATUS_OK); - } - else if ( (target == VIF_PHYS) || IS_PRIV(current) ) - { - vif->tx_shadow_ring[j].id = tx.id; - vif->tx_shadow_ring[j].size = tx.size; - vif->tx_shadow_ring[j].header = - kmem_cache_alloc(net_header_cachep, GFP_KERNEL); - if ( vif->tx_shadow_ring[j].header == NULL ) - { - make_tx_response(vif, tx.id, RING_STATUS_OK); - goto tx_unmap_and_continue; - } - - memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN); - vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN; - get_page_tot(buf_page); - j = TX_RING_INC(j); - } - else - { - make_tx_response(vif, tx.id, RING_STATUS_DROPPED); - } - - tx_unmap_and_continue: - unmap_domain_mem(g_data); - spin_unlock_irq(¤t->page_lock); - } - - vif->tx_req_cons = i; - - if ( vif->tx_prod != j ) - { - smp_mb(); /* Let other CPUs see new descriptors first. */ - vif->tx_prod = j; add_to_net_schedule_list_tail(vif); maybe_schedule_tx_action(); } @@ -2037,16 +2082,14 @@ long do_net_update(void) } -static void make_tx_response(net_vif_t *vif, - unsigned short id, - unsigned char st) +static void __make_tx_response(net_vif_t *vif, + unsigned short id, + unsigned char st) { - unsigned long flags; unsigned int pos; tx_resp_entry_t *resp; /* Place on the response ring for the relevant domain. */ - spin_lock_irqsave(&vif->tx_lock, flags); pos = vif->tx_resp_prod; resp = &vif->shared_rings->tx_ring[pos].resp; resp->id = id; @@ -2058,7 +2101,6 @@ static void make_tx_response(net_vif_t *vif, unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET); guest_event_notify(cpu_mask); } - spin_unlock_irqrestore(&vif->tx_lock, flags); } |