diff options
Diffstat (limited to 'xenolinux-2.4.21-pre4-sparse/arch')
16 files changed, 673 insertions, 333 deletions
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in b/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in index a12a1ec5c0..7dd6b1c985 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/config.in @@ -125,3 +125,5 @@ if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then fi endmenu + +source lib/Config.in
\ No newline at end of file diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig b/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig index 0059207429..c9e4e1a4f7 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/defconfig @@ -113,7 +113,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=4096 -# CONFIG_BLK_DEV_INITRD is not set +CONFIG_BLK_DEV_INITRD=y CONFIG_XENOLINUX_BLOCK=y # @@ -286,8 +286,11 @@ CONFIG_JBD=y # CONFIG_FAT_FS is not set # CONFIG_CRAMFS is not set # CONFIG_TMPFS is not set -# CONFIG_RAMFS is not set -# CONFIG_ISO9660_FS is not set +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_ZISOFS_FS=y # CONFIG_MINIX_FS is not set # CONFIG_VXFS_FS is not set # CONFIG_NTFS_FS is not set @@ -299,6 +302,47 @@ CONFIG_EXT2_FS=y # CONFIG_UDF_FS is not set # CONFIG_UFS_FS is not set +# Skanky NLS stuff +CONFIG_NLS_DEFAULT="iso8559-1" +CONFIG_NLS_CODEPAGE_437=n +CONFIG_NLS_CODEPAGE_737=n +CONFIG_NLS_CODEPAGE_775=n +CONFIG_NLS_CODEPAGE_850=n +CONFIG_NLS_CODEPAGE_852=n +CONFIG_NLS_CODEPAGE_855=n +CONFIG_NLS_CODEPAGE_857=n +CONFIG_NLS_CODEPAGE_860=n +CONFIG_NLS_CODEPAGE_861=n +CONFIG_NLS_CODEPAGE_862=n +CONFIG_NLS_CODEPAGE_863=n +CONFIG_NLS_CODEPAGE_864=n +CONFIG_NLS_CODEPAGE_865=n +CONFIG_NLS_CODEPAGE_866=n +CONFIG_NLS_CODEPAGE_869=n +CONFIG_NLS_CODEPAGE_936=n +CONFIG_NLS_CODEPAGE_950=n +CONFIG_NLS_CODEPAGE_932=n +CONFIG_NLS_CODEPAGE_949=n +CONFIG_NLS_CODEPAGE_874=n +CONFIG_NLS_ISO8859_8=n +CONFIG_NLS_CODEPAGE_1250=n +CONFIG_NLS_CODEPAGE_1251=n +CONFIG_NLS_ISO8859_1=n +CONFIG_NLS_ISO8859_2=n +CONFIG_NLS_ISO8859_3=n +CONFIG_NLS_ISO8859_4=n +CONFIG_NLS_ISO8859_5=n +CONFIG_NLS_ISO8859_6=n +CONFIG_NLS_ISO8859_7=n +CONFIG_NLS_ISO8859_9=n +CONFIG_NLS_ISO8859_13=n +CONFIG_NLS_ISO8859_14=n +CONFIG_NLS_ISO8859_15=n +CONFIG_NLS_KOI8_R=n +CONFIG_NLS_KOI8_U=n +CONFIG_NLS_UTF8=n + + # # Network File Systems # @@ -400,6 +444,8 @@ CONFIG_KALLSYMS=y # CONFIG_JFS_FS is not set # CONFIG_QNX4FS_FS is not set - - - +# +# Library routines +# +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=n diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_ide.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_ide.c index 599f9f6c0a..f8ff9dd795 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_ide.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/block/xl_ide.c @@ -28,7 +28,6 @@ static struct block_device_operations xlide_block_fops = revalidate: xenolinux_block_revalidate, }; - int xlide_hwsect(int minor) { return xlide_hardsect_size[minor]; @@ -38,7 +37,9 @@ int xlide_hwsect(int minor) int xlide_init(xen_disk_info_t *xdi) { int i, result, units, minors, disk; + unsigned short minor; struct gendisk *gd; + char buf[64]; /* If we don't have any usable IDE devices we may as well bail now. */ units = 0; @@ -107,18 +108,43 @@ int xlide_init(xen_disk_info_t *xdi) /* Now register each disk in turn. */ for ( i = 0; i < xdi->count; i++ ) { - disk = xdi->disks[i].device & XENDEV_IDX_MASK; + disk = xdi->disks[i].device & XENDEV_IDX_MASK; + minor = disk << XLIDE_PARTN_SHIFT; + /* We can use the first 16 IDE devices. */ if ( !IS_IDE_XENDEV(xdi->disks[i].device) || (disk >= 16) ) continue; ((xl_disk_t *)gd->real_devices)[disk].capacity = xdi->disks[i].capacity; - register_disk(gd, - MKDEV(XLIDE_MAJOR, disk<<XLIDE_PARTN_SHIFT), - 1<<XLIDE_PARTN_SHIFT, - &xlide_block_fops, - xdi->disks[i].capacity); + + + switch (xdi->disks[i].type) { + case XD_TYPE_CDROM: + set_device_ro(MKDEV(XLIDE_MAJOR, minor), 1); + // fall through + + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->flags[disk] = GENHD_FL_REMOVABLE; + printk(KERN_ALERT "Skipping partition check on %s /dev/%s\n", + xdi->disks[i].type == XD_TYPE_CDROM ? "cdrom" : + (xdi->disks[i].type == XD_TYPE_TAPE ? "tape" : "floppy"), + disk_name(gd, minor, buf)); + break; + + case XD_TYPE_DISK: + register_disk(gd, MKDEV(XLIDE_MAJOR, minor), 1<<XLIDE_PARTN_SHIFT, + &xlide_block_fops, xdi->disks[i].capacity); + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown ide device type %d\n", + xdi->disks[i].type); + break; + } + + } printk(KERN_ALERT diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c index c36ab02e96..d3e4752eb3 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_core.c @@ -75,15 +75,11 @@ static int cmd_read_proc(char *page, char **start, off_t off, static ssize_t dom_vif_read(struct file * file, char * buff, size_t size, loff_t * off) { - char hyp_buf[128]; // Hypervisor is going to write its reply here. + char hyp_buf[128]; network_op_t op; static int finished = 0; - // This seems to be the only way to make the OS stop making read requests - // to the file. When we use the fileoperations version of read, offset - // seems to be ignored altogether. - - if (finished) + if ( finished ) { finished = 0; return 0; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h index 22ebd7aba0..74c9b24de7 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/dom0_ops.h @@ -81,6 +81,8 @@ typedef struct domain_launch unsigned long virt_startinfo_addr; unsigned int num_vifs; char cmd_line[MAX_CMD_LEN]; + unsigned long virt_mod_addr; + unsigned long virt_mod_len; } dom_meminfo_t; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c index 13fe25ec9c..852f6943e2 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/dom0/vfr.c @@ -26,7 +26,7 @@ u16 antous(const char *buff, int len); int anton(const char *buff, int len); static int vfr_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { strcpy(page, readbuf); *readbuf = '\0'; @@ -60,147 +60,163 @@ static int vfr_read_proc(char *page, char **start, off_t off, ((_x)=='\f') || ((_x)=='\r') || ((_x)=='\n') ) static int vfr_write_proc(struct file *file, const char *buffer, - u_long count, void *data) + u_long count, void *data) { - network_op_t op; - int ret, len; - int ts, te, tl; // token start, end, and length - int fs, fe, fl; // field. - - len = count; - ts = te = 0; - - memset(&op, 0, sizeof(network_op_t)); - - // get the command: - while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. - te = ts; - while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end - if ( te <= ts ) goto bad; - tl = te - ts; - - if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) - { - op.cmd = NETWORK_OP_ADDRULE; - } - else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) - { - op.cmd = NETWORK_OP_DELETERULE; - } - else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) - { - op.cmd = NETWORK_OP_GETRULELIST; - goto doneparsing; - } - - ts = te; - - // get the action - while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. - te = ts; - while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end - if ( te <= ts ) goto bad; - tl = te - ts; - - if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) - { - op.u.net_rule.action = NETWORK_ACTION_ACCEPT; - goto keyval; - } - if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) - { - op.u.net_rule.action = NETWORK_ACTION_COUNT; - goto keyval; - } - - // default case; - return (len); - + network_op_t op; + int ret, len; + int ts, te, tl; // token start, end, and length + int fs, fe, fl; // field. - // get the key=val pairs. - keyval: - while (count) - { - //get field - ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } - te = ts; - while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) - { te++; count--; } - if ( te <= ts ) - goto doneparsing; - tl = te - ts; - fs = ts; fe = te; fl = tl; // save the field markers. - // skip " = " (ignores extra equals.) - while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) - { te++; count--; } - ts = te; - while ( count && !isspace(buffer[te]) ) { te++; count--; } - tl = te - ts; + len = count; + ts = te = 0; - if ( (fl <= 0) || (tl <= 0) ) goto bad; + memset(&op, 0, sizeof(network_op_t)); - if (strncmp(&buffer[fs], "srcaddr", fl) == 0) - { - op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) - { - op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) - { - op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) - { - op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); - } - else if (strncmp(&buffer[fs], "srcport", fl) == 0) + // get the command: + while ( count && isspace(buffer[ts]) ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && !isspace(buffer[te]) ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ADD", tl) == 0 ) { - op.u.net_rule.src_port = antous(&buffer[ts], tl); + op.cmd = NETWORK_OP_ADDRULE; } - else if (strncmp(&buffer[fs], "dstport", fl) == 0) + else if ( strncmp(&buffer[ts], "DELETE", tl) == 0 ) { - op.u.net_rule.dst_port = antous(&buffer[ts], tl); + op.cmd = NETWORK_OP_DELETERULE; } - else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + else if ( strncmp(&buffer[ts], "PRINT", tl) == 0 ) { - op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + op.cmd = NETWORK_OP_GETRULELIST; + goto doneparsing; } - else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + + ts = te; + + // get the action + while ( count && (buffer[ts] == ' ') ) { ts++; count--; } // skip spaces. + te = ts; + while ( count && (buffer[te] != ' ') ) { te++; count--; } // command end + if ( te <= ts ) goto bad; + tl = te - ts; + + if ( strncmp(&buffer[ts], "ACCEPT", tl) == 0 ) { - op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + goto keyval; } - else if (strncmp(&buffer[fs], "srcint", fl) == 0) + if ( strncmp(&buffer[ts], "COUNT", tl) == 0 ) { - op.u.net_rule.src_interface = anton(&buffer[ts], tl); + op.u.net_rule.action = NETWORK_ACTION_COUNT; + goto keyval; } - else if (strncmp(&buffer[fs], "dstint", fl) == 0) + + // default case; + return (len); + + + // get the key=val pairs. + keyval: + while (count) { - op.u.net_rule.dst_interface = anton(&buffer[ts], tl); - } - else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) - { - if (strncmp(&buffer[ts], "any", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_ANY; - if (strncmp(&buffer[ts], "ip", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_IP; - if (strncmp(&buffer[ts], "tcp", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_TCP; - if (strncmp(&buffer[ts], "udp", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_UDP; - if (strncmp(&buffer[ts], "arp", tl) == 0) - op.u.net_rule.proto = NETWORK_PROTO_ARP; - + //get field + ts = te; while ( count && isspace(buffer[ts]) ) { ts++; count--; } + te = ts; + while ( count && !isspace(buffer[te]) && (buffer[te] != '=') ) + { te++; count--; } + if ( te <= ts ) + goto doneparsing; + tl = te - ts; + fs = ts; fe = te; fl = tl; // save the field markers. + // skip " = " (ignores extra equals.) + while ( count && (isspace(buffer[te]) || (buffer[te] == '=')) ) + { te++; count--; } + ts = te; + while ( count && !isspace(buffer[te]) ) { te++; count--; } + tl = te - ts; + + if ( (fl <= 0) || (tl <= 0) ) goto bad; + + /* NB. Prefix matches must go first! */ + if (strncmp(&buffer[fs], "src", fl) == 0) + { + op.u.net_rule.src_vif = VIF_ANY_INTERFACE; + } + else if (strncmp(&buffer[fs], "dst", fl) == 0) + { + op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE; + } + else if (strncmp(&buffer[fs], "srcaddr", fl) == 0) + { + op.u.net_rule.src_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddr", fl) == 0) + { + op.u.net_rule.dst_addr = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcaddrmask", fl) == 0) + { + op.u.net_rule.src_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstaddrmask", fl) == 0) + { + op.u.net_rule.dst_addr_mask = getipaddr(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcport", fl) == 0) + { + op.u.net_rule.src_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstport", fl) == 0) + { + op.u.net_rule.dst_port = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcportmask", fl) == 0) + { + op.u.net_rule.src_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstportmask", fl) == 0) + { + op.u.net_rule.dst_port_mask = antous(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "srcdom", fl) == 0) + { + op.u.net_rule.src_vif |= anton(&buffer[ts], tl)<<VIF_DOMAIN_SHIFT; + } + else if (strncmp(&buffer[fs], "srcidx", fl) == 0) + { + op.u.net_rule.src_vif |= anton(&buffer[ts], tl); + } + else if (strncmp(&buffer[fs], "dstdom", fl) == 0) + { + op.u.net_rule.dst_vif |= anton(&buffer[ts], tl)<<VIF_DOMAIN_SHIFT; + } + else if (strncmp(&buffer[fs], "dstidx", fl) == 0) + { + op.u.net_rule.dst_vif |= anton(&buffer[ts], tl); + } + else if ( (strncmp(&buffer[fs], "proto", fl) == 0)) + { + if (strncmp(&buffer[ts], "any", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ANY; + if (strncmp(&buffer[ts], "ip", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_IP; + if (strncmp(&buffer[ts], "tcp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_TCP; + if (strncmp(&buffer[ts], "udp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_UDP; + if (strncmp(&buffer[ts], "arp", tl) == 0) + op.u.net_rule.proto = NETWORK_PROTO_ARP; + } } - } doneparsing: - ret = HYPERVISOR_network_op(&op); - return(len); + ret = HYPERVISOR_network_op(&op); + return(len); bad: - return(len); + return(len); } @@ -256,51 +272,50 @@ int anton(const char *buff, int len) u16 antous(const char *buff, int len) { - u16 ret; - char c; + u16 ret; + char c; - ret = 0; + ret = 0; - while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) - { - ret *= 10; - ret += c - '0'; - buff++; len--; - } + while ( (len) && ((c = *buff) >= '0') && (c <= '9') ) + { + ret *= 10; + ret += c - '0'; + buff++; len--; + } - return ret; + return ret; } u32 getipaddr(const char *buff, unsigned int len) { - int i; - char c; - u32 ret, val; + char c; + u32 ret, val; - ret = 0; val = 0; + ret = 0; val = 0; - while ( len ) - { - if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + while ( len ) { - return(0); // malformed. + if (!((((c = *buff) >= '0') && ( c <= '9')) || ( c == '.' ) ) ) + { + return(0); // malformed. + } + + if ( c == '.' ) { + if (val > 255) return (0); //malformed. + ret = ret << 8; + ret += val; + val = 0; + len--; buff++; + continue; + } + val *= 10; + val += c - '0'; + buff++; len--; } + ret = ret << 8; + ret += val; - if ( c == '.' ) { - if (val > 255) return (0); //malformed. - ret = ret << 8; - ret += val; - val = 0; - len--; buff++; - continue; - } - val *= 10; - val += c - '0'; - buff++; len--; - } - ret = ret << 8; - ret += val; - - return (ret); + return (ret); } diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c index 01e81e0cf0..03f9939e4e 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c @@ -3,7 +3,7 @@ * * Virtual network driver for XenoLinux. * - * Copyright (c) 2002, K A Fraser + * Copyright (c) 2002-2003, K A Fraser */ #include <linux/config.h> @@ -47,72 +47,70 @@ static void cleanup_module(void); static struct list_head dev_list; -/* - * RX RING: RX_IDX <= rx_cons <= rx_prod - * TX RING: TX_IDX <= tx_cons <= tx_prod - * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) - */ struct net_private { struct list_head list; struct net_device *dev; struct net_device_stats stats; - struct sk_buff **tx_skb_ring; - struct sk_buff **rx_skb_ring; atomic_t tx_entries; - unsigned int rx_idx, tx_idx, tx_full; + unsigned int rx_resp_cons, tx_resp_cons, tx_full; net_ring_t *net_ring; + net_idx_t *net_idx; spinlock_t tx_lock; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[TX_RING_SIZE]; + struct sk_buff *rx_skbs[RX_RING_SIZE]; }; +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + _id; }) + static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs) { struct net_device *dev = (struct net_device *)dev_id; struct net_private *np = dev->priv; - printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_idx = %d," - " tx_cons = %d, tx_prod = %d, tx_event = %d, state=%d\n", - np->tx_full, atomic_read(&np->tx_entries), np->tx_idx, - np->net_ring->tx_cons, np->net_ring->tx_prod, - np->net_ring->tx_event, + printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d," + " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n", + np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "rx_resp_cons = %d," + " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); } static int network_open(struct net_device *dev) { struct net_private *np = dev->priv; - int error = 0; - - np->rx_idx = np->tx_idx = np->tx_full = 0; + int i, error = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; memset(&np->stats, 0, sizeof(np->stats)); - spin_lock_init(&np->tx_lock); - atomic_set(&np->tx_entries, 0); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); - np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; - np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; - np->net_ring->tx_ring = NULL; - np->net_ring->rx_ring = NULL; - - np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), - GFP_KERNEL); - np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), - GFP_KERNEL); - np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), - GFP_KERNEL); - np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), - GFP_KERNEL); - if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || - (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) - { - printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); - error = -ENOBUFS; - goto fail; - } + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i < TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i < RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); network_alloc_rx_buffers(dev); @@ -154,10 +152,6 @@ static int network_open(struct net_device *dev) return 0; fail: - if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); - if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); - if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); - if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); kfree(np); return error; } @@ -169,28 +163,30 @@ static void network_tx_buf_gc(struct net_device *dev) struct net_private *np = dev->priv; struct sk_buff *skb; unsigned long flags; - unsigned int cons; + unsigned int prod; + tx_entry_t *tx_ring = np->net_ring->tx_ring; spin_lock_irqsave(&np->tx_lock, flags); do { - cons = np->net_ring->tx_cons; + prod = np->net_idx->tx_resp_prod; - for ( i = np->tx_idx; i != cons; i = TX_RING_INC(i) ) + for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) ) { - skb = np->tx_skb_ring[i]; + skb = np->tx_skbs[tx_ring[i].resp.id]; + ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id); dev_kfree_skb_any(skb); atomic_dec(&np->tx_entries); } - np->tx_idx = i; + np->tx_resp_cons = prod; /* Set a new event, then check for race with update of tx_cons. */ - np->net_ring->tx_event = - TX_RING_ADD(cons, (atomic_read(&np->tx_entries)>>1) + 1); + np->net_idx->tx_event = + TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1); smp_mb(); } - while ( cons != np->net_ring->tx_cons ); + while ( prod != np->net_idx->tx_resp_prod ); if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) { @@ -201,43 +197,39 @@ static void network_tx_buf_gc(struct net_device *dev) spin_unlock_irqrestore(&np->tx_lock, flags); } -inline unsigned long get_ppte(unsigned long addr) +inline pte_t *get_ppte(void *addr) { - unsigned long ppte; - pgd_t *pgd; pmd_t *pmd; pte_t *ptep; - pgd = pgd_offset_k(addr); - - if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); - - pmd = pmd_offset(pgd, addr); - if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); - - ptep = pte_offset(pmd, addr); - ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep)); - - return ppte; + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; } static void network_alloc_rx_buffers(struct net_device *dev) { - unsigned int i; + unsigned int i, id; struct net_private *np = dev->priv; struct sk_buff *skb; - unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES); - for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) + for ( i = np->net_idx->rx_req_prod; i != end; i = RX_RING_INC(i) ) { skb = dev_alloc_skb(RX_BUF_SIZE); if ( skb == NULL ) break; skb->dev = dev; - np->rx_skb_ring[i] = skb; - np->net_ring->rx_ring[i].addr = get_ppte((unsigned long)skb->head); - np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[i].req.id = (unsigned short)id; + np->net_ring->rx_ring[i].req.addr = + virt_to_machine(get_ppte(skb->head)); } - np->net_ring->rx_prod = i; + np->net_idx->rx_req_prod = i; - np->net_ring->rx_event = RX_RING_INC(np->rx_idx); + np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons); /* * We may have allocated buffers which have entries outstanding in @@ -254,16 +246,18 @@ static void network_free_rx_buffers(struct net_device *dev) struct net_private *np = dev->priv; struct sk_buff *skb; - for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) + for ( i = np->rx_resp_cons; + i != np->net_idx->rx_req_prod; + i = RX_RING_INC(i) ) { - skb = np->rx_skb_ring[i]; + skb = np->rx_skbs[np->net_ring->rx_ring[i].req.id]; dev_kfree_skb_any(skb); } } static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) { - unsigned int i; + unsigned int i, id; struct net_private *np = (struct net_private *)dev->priv; if ( np->tx_full ) @@ -272,7 +266,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); return -ENOBUFS; } - i = np->net_ring->tx_prod; + i = np->net_idx->tx_req_prod; if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) { @@ -284,11 +278,14 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = new_skb; } - np->tx_skb_ring[i] = skb; - np->net_ring->tx_ring[i].addr = - (unsigned long)phys_to_machine(virt_to_phys(skb->data)); - np->net_ring->tx_ring[i].size = skb->len; - np->net_ring->tx_prod = TX_RING_INC(i); + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + np->net_ring->tx_ring[i].req.id = (unsigned short)id; + np->net_ring->tx_ring[i].req.addr = + phys_to_machine(virt_to_phys(skb->data)); + np->net_ring->tx_ring[i].req.size = skb->len; + np->net_idx->tx_req_prod = TX_RING_INC(i); atomic_inc(&np->tx_entries); np->stats.tx_bytes += skb->len; @@ -316,13 +313,17 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) struct net_device *dev = (struct net_device *)dev_id; struct net_private *np = dev->priv; struct sk_buff *skb; - rx_entry_t *rx; + rx_resp_entry_t *rx; again: - for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) + for ( i = np->rx_resp_cons; + i != np->net_idx->rx_resp_prod; + i = RX_RING_INC(i) ) { - rx = &np->net_ring->rx_ring[i]; - skb = np->rx_skb_ring[i]; + rx = &np->net_ring->rx_ring[i].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); if ( rx->status != RING_STATUS_OK ) { @@ -341,8 +342,7 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) skb_shinfo(skb)->frag_list = NULL; phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = - (*(unsigned long *)phys_to_virt(machine_to_phys(rx->addr)) - ) >> PAGE_SHIFT; + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; if ( rx->offset < 16 ) { @@ -353,23 +353,23 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) skb_reserve(skb, rx->offset - 16); - skb_put(skb, np->net_ring->rx_ring[i].size); + skb_put(skb, rx->size); skb->protocol = eth_type_trans(skb, dev); np->stats.rx_packets++; - np->stats.rx_bytes += np->net_ring->rx_ring[i].size; + np->stats.rx_bytes += rx->size; netif_rx(skb); dev->last_rx = jiffies; } - np->rx_idx = i; + np->rx_resp_cons = i; network_alloc_rx_buffers(dev); /* Deal with hypervisor racing our resetting of rx_event. */ smp_mb(); - if ( np->net_ring->rx_cons != i ) goto again; + if ( np->net_idx->rx_resp_prod != i ) goto again; } @@ -382,8 +382,6 @@ static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) int network_close(struct net_device *dev) { - struct net_private *np = dev->priv; - netif_stop_queue(dev); free_irq(NET_RX_IRQ, dev); @@ -401,9 +399,6 @@ int network_close(struct net_device *dev) kfree(np->net_ring->tx_ring); #endif - kfree(np->rx_skb_ring); - kfree(np->tx_skb_ring); - MOD_DEC_USE_COUNT; return 0; @@ -419,14 +414,21 @@ static struct net_device_stats *network_get_stats(struct net_device *dev) int __init init_module(void) { - int i, err; + int i, fixmap_idx=-1, err; struct net_device *dev; struct net_private *np; INIT_LIST_HEAD(&dev_list); - for ( i = 0; i < start_info.num_net_rings; i++ ) + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) { + if ( start_info.net_rings[i] == 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + dev = alloc_etherdev(sizeof(struct net_private)); if ( dev == NULL ) { @@ -434,8 +436,11 @@ int __init init_module(void) goto fail; } + set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]); + np = dev->priv; - np->net_ring = start_info.net_rings + i; + np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[i]; SET_MODULE_OWNER(dev); dev->open = network_open; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S index 0a6a5374d1..0250f5e11b 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/entry.S @@ -133,6 +133,55 @@ ENOSYS = 38 movl $-8192, reg; \ andl %esp, reg +ENTRY(lcall7) + pushfl # We get a different stack layout with call + pushl %eax # gates, which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + andl $~(NT_MASK|TF_MASK|DF_MASK), %eax + pushl %eax + popfl + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x7 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + +ENTRY(lcall27) + pushfl # We get a different stack layout with call + pushl %eax # gates, which has to be cleaned up later.. + SAVE_ALL + movl EIP(%esp),%eax # due to call gates, this is eflags, not eip.. + movl CS(%esp),%edx # this is eip.. + movl EFLAGS(%esp),%ecx # and this is cs.. + movl %eax,EFLAGS(%esp) # + andl $~(NT_MASK|TF_MASK|DF_MASK), %eax + pushl %eax + popfl + movl %edx,EIP(%esp) # Now we move them to their "normal" places + movl %ecx,CS(%esp) # + movl %esp,%ebx + pushl %ebx + andl $-8192,%ebx # GET_CURRENT + movl exec_domain(%ebx),%edx # Get the execution domain + movl 4(%edx),%edx # Get the lcall7 handler for the domain + pushl $0x27 + call *%edx + addl $4, %esp + popl %eax + jmp ret_from_sys_call + + ENTRY(ret_from_fork) pushl %ebx call SYMBOL_NAME(schedule_tail) @@ -161,15 +210,14 @@ ENTRY(system_call) movl %eax,EAX(%esp) # save the return value ENTRY(ret_from_sys_call) movl SYMBOL_NAME(HYPERVISOR_shared_info),%esi - xorl %eax,%eax - movl %eax,4(%esi) # need_resched and signals atomic test + btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # make tests atomic ret_syscall_tests: cmpl $0,need_resched(%ebx) jne reschedule cmpl $0,sigpending(%ebx) je safesti # ensure need_resched updates are seen signal_return: - btsl %eax,4(%esi) # reenable event callbacks + btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks movl %esp,%eax xorl %edx,%edx call SYMBOL_NAME(do_signal) @@ -206,8 +254,8 @@ ret_from_exception: ALIGN reschedule: - btsl %eax,4(%esi) # reenable event callbacks - call SYMBOL_NAME(schedule) # test + btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks + call SYMBOL_NAME(schedule) # test jmp ret_from_sys_call ENTRY(divide_error) @@ -270,12 +318,12 @@ ENTRY(hypervisor_callback) movb CS(%esp),%cl test $2,%cl # slow return to ring 2 or 3 jne ret_syscall_tests -safesti:btsl %eax,4(%esi) # reenable event callbacks +safesti:btsl $EVENTS_MASTER_ENABLE_BIT,4(%esi) # reenable event callbacks scrit: /**** START OF CRITICAL REGION ****/ cmpl %eax,(%esi) jne 14f # process more events if necessary... RESTORE_ALL -14: btrl %eax,4(%esi) +14: btrl $EVENTS_MASTER_ENABLE_BIT,4(%esi) jmp 11b ecrit: /**** END OF CRITICAL REGION ****/ # [How we do the fixup]. We want to merge the current stack frame with the @@ -317,20 +365,26 @@ critical_fixup_table: .byte 0x20 # pop %es .byte 0x24,0x24,0x24 # add $4,%esp .byte 0x28 # iret - .byte 0x00,0x00,0x00,0x00 # btrl %eax,4(%esi) + .byte 0x00,0x00,0x00,0x00,0x00 # btrl $31,4(%esi) .byte 0x00,0x00 # jmp 11b # Hypervisor uses this for application faults while it executes. ENTRY(failsafe_callback) 1: pop %ds 2: pop %es -3: iret +3: pop %fs +4: pop %gs +5: iret .section .fixup,"ax"; \ -4: movl $0,(%esp); \ +6: movl $0,(%esp); \ jmp 1b; \ -5: movl $0,(%esp); \ +7: movl $0,(%esp); \ jmp 2b; \ -6: pushl %ss; \ +8: movl $0,(%esp); \ + jmp 3b; \ +9: movl $0,(%esp); \ + jmp 4b; \ +10: pushl %ss; \ popl %ds; \ pushl %ss; \ popl %es; \ @@ -339,9 +393,11 @@ ENTRY(failsafe_callback) .previous; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ + .long 1b,6b; \ + .long 2b,7b; \ + .long 3b,8b; \ + .long 4b,9b; \ + .long 5b,10b; \ .previous ENTRY(coprocessor_error) diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S index 86a82b13dc..a89fd8eda4 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/head.S @@ -57,11 +57,14 @@ ENTRY(stack_start) ENTRY(empty_zero_page) .org 0x2000 +ENTRY(default_ldt) + +.org 0x3000 ENTRY(cpu0_pte_quicklist) -.org 0x2400 +.org 0x3400 ENTRY(cpu0_pgd_quicklist) -.org 0x2800 +.org 0x3800 ENTRY(stext) ENTRY(_stext) diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c index 68670daa8c..3f414e9876 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c @@ -27,8 +27,8 @@ void do_hypervisor_callback(struct pt_regs *regs) do { /* Specialised local_irq_save(). */ - flags = shared->events_enable; - shared->events_enable = 0; + flags = test_and_clear_bit(EVENTS_MASTER_ENABLE_BIT, + &shared->events_mask); barrier(); events = xchg(&shared->events, 0); @@ -50,7 +50,7 @@ void do_hypervisor_callback(struct pt_regs *regs) : "eax", "ecx", "edx", "memory" ); /* Specialised local_irq_restore(). */ - shared->events_enable = flags; + if ( flags ) set_bit(EVENTS_MASTER_ENABLE_BIT, &shared->events_mask); barrier(); } while ( shared->events ); @@ -62,25 +62,25 @@ void do_hypervisor_callback(struct pt_regs *regs) * Define interface to generic handling in irq.c */ -static unsigned int startup_hypervisor_event(unsigned int irq) -{ - set_bit(irq, &event_mask); - return 0; -} - static void shutdown_hypervisor_event(unsigned int irq) { clear_bit(irq, &event_mask); + clear_bit(irq, &HYPERVISOR_shared_info->events_mask); } static void enable_hypervisor_event(unsigned int irq) { set_bit(irq, &event_mask); + set_bit(irq, &HYPERVISOR_shared_info->events_mask); + if ( test_bit(EVENTS_MASTER_ENABLE_BIT, + &HYPERVISOR_shared_info->events_mask) ) + do_hypervisor_callback(NULL); } static void disable_hypervisor_event(unsigned int irq) { clear_bit(irq, &event_mask); + clear_bit(irq, &HYPERVISOR_shared_info->events_mask); } static void ack_hypervisor_event(unsigned int irq) @@ -90,6 +90,13 @@ static void ack_hypervisor_event(unsigned int irq) printk("Unexpected hypervisor event %d\n", irq); atomic_inc(&irq_err_count); } + set_bit(irq, &HYPERVISOR_shared_info->events_mask); +} + +static unsigned int startup_hypervisor_event(unsigned int irq) +{ + enable_hypervisor_event(irq); + return 0; } static void end_hypervisor_event(unsigned int irq) diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c index 6c93943036..ca89b694bd 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/ldt.c @@ -9,18 +9,161 @@ #include <linux/sched.h> #include <linux/string.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/vmalloc.h> + +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/ldt.h> +#include <asm/desc.h> /* - * XXX KAF (28/7/02): This stuff is only used for DOS emulation, and is - * the default way of finding current TCB in linuxthreads. Supporting - * table update svia the hypervisor is feasible, but a hassle: for now, - * recompiling linuxthreads is the most sensible option. - * - * Oh, this may become an issue depending on what JVM we use for - * running the xeno-daemon. + * read_ldt() is not really atomic - this is not a problem since + * synchronization of reads and writes done to the LDT has to be + * assured by user-space anyway. Writes are atomic, to protect + * the security checks done on new descriptors. */ +static int read_ldt(void * ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + struct mm_struct * mm = current->mm; + + err = 0; + if (!mm->context.segments) + goto out; + + size = LDT_ENTRIES*LDT_ENTRY_SIZE; + if (size > bytecount) + size = bytecount; + + err = size; + if (copy_to_user(ptr, mm->context.segments, size)) + err = -EFAULT; + out: + return err; +} + +static int read_default_ldt(void * ptr, unsigned long bytecount) +{ + int err; + unsigned long size; + void *address; + + err = 0; + address = &default_ldt[0]; + size = sizeof(struct desc_struct); + if (size > bytecount) + size = bytecount; + + err = size; + if (copy_to_user(ptr, address, size)) + err = -EFAULT; + + return err; +} + +static int write_ldt(void * ptr, unsigned long bytecount, int oldmode) +{ + struct mm_struct * mm = current->mm; + __u32 entry_1, entry_2, *lp; + unsigned long phys_lp; + int error; + struct modify_ldt_ldt_s ldt_info; + + error = -EINVAL; + if (bytecount != sizeof(ldt_info)) + goto out; + error = -EFAULT; + if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) + goto out; + + error = -EINVAL; + if (ldt_info.entry_number >= LDT_ENTRIES) + goto out; + if (ldt_info.contents == 3) { + if (oldmode) + goto out; + if (ldt_info.seg_not_present == 0) + goto out; + } + + down_write(&mm->mmap_sem); + if (!mm->context.segments) { + void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); + error = -ENOMEM; + if (!segments) + goto out_unlock; + memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE); + make_pages_readonly(segments, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE); + wmb(); + mm->context.segments = segments; + mm->context.cpuvalid = 1UL << smp_processor_id(); + load_LDT(mm); + flush_page_update_queue(); + } + + lp = (__u32 *)((ldt_info.entry_number<<3) + (char *)mm->context.segments); + phys_lp = arbitrary_virt_to_phys(lp); + + /* Allow LDTs to be cleared by the user. */ + if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { + if (oldmode || + (ldt_info.contents == 0 && + ldt_info.read_exec_only == 1 && + ldt_info.seg_32bit == 0 && + ldt_info.limit_in_pages == 0 && + ldt_info.seg_not_present == 1 && + ldt_info.useable == 0 )) { + entry_1 = 0; + entry_2 = 0; + goto install; + } + } + + entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) | + (ldt_info.limit & 0x0ffff); + entry_2 = (ldt_info.base_addr & 0xff000000) | + ((ldt_info.base_addr & 0x00ff0000) >> 16) | + (ldt_info.limit & 0xf0000) | + ((ldt_info.read_exec_only ^ 1) << 9) | + (ldt_info.contents << 10) | + ((ldt_info.seg_not_present ^ 1) << 15) | + (ldt_info.seg_32bit << 22) | + (ldt_info.limit_in_pages << 23) | + 0x7000; + if (!oldmode) + entry_2 |= (ldt_info.useable << 20); + + /* Install the new entry ... */ + install: + HYPERVISOR_update_descriptor(phys_lp, entry_1, entry_2); + error = 0; + + out_unlock: + up_write(&mm->mmap_sem); + out: + return error; +} asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) { - return -ENOSYS; + int ret = -ENOSYS; + + switch (func) { + case 0: + ret = read_ldt(ptr, bytecount); + break; + case 1: + ret = write_ldt(ptr, bytecount, 1); + break; + case 2: + ret = read_default_ldt(ptr, bytecount); + break; + case 0x11: + ret = write_ldt(ptr, bytecount, 0); + break; + } + return ret; } diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c index 32ce1a66ab..d4db667474 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c @@ -140,6 +140,8 @@ void release_segments(struct mm_struct *mm) if (ldt) { mm->context.segments = NULL; clear_LDT(); + make_pages_writeable(ldt, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE); + flush_page_update_queue(); vfree(ldt); } } @@ -225,10 +227,15 @@ void copy_segments(struct task_struct *p, struct mm_struct *new_mm) * Completely new LDT, we initialize it from the parent: */ ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE); - if (!ldt) + if ( ldt == NULL ) + { printk(KERN_WARNING "ldt allocation failed\n"); + } else + { memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE); + make_pages_readonly(ldt, (LDT_ENTRIES*LDT_ENTRY_SIZE)/PAGE_SIZE); + } } new_mm->context.segments = ldt; new_mm->context.cpuvalid = ~0UL; /* valid on all CPU's - they can't have stale data */ @@ -335,6 +342,10 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; + __cli(); + + MULTICALL_flush_page_update_queue(); + /* * This is basically 'unlazy_fpu', except that we queue a multicall to * indicate FPU task switch, rather than synchronously trapping to Xen. @@ -356,14 +367,7 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */ execute_multicall_list(); - sti(); /* matches 'cli' in switch_mm() */ - - /* - * Save away %fs and %gs. No need to save %es and %ds, as - * those are always kernel segments while inside the kernel. - */ - asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); - asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); + __sti(); /* * Restore %fs and %gs. diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c index 00c68a836f..b3fa27fb11 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c @@ -153,10 +153,9 @@ void __init setup_arch(char **cmdline_p) extern unsigned long cpu0_pte_quicklist[]; extern unsigned long cpu0_pgd_quicklist[]; - HYPERVISOR_shared_info->event_address = - (unsigned long)hypervisor_callback; - HYPERVISOR_shared_info->failsafe_address = - (unsigned long)failsafe_callback; + HYPERVISOR_set_callbacks( + __KERNEL_CS, (unsigned long)hypervisor_callback, + __KERNEL_CS, (unsigned long)failsafe_callback); boot_cpu_data.pgd_quick = cpu0_pgd_quicklist; boot_cpu_data.pte_quick = cpu0_pte_quicklist; @@ -969,6 +968,9 @@ void __init cpu_init (void) HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); + load_LDT(&init_mm); + flush_page_update_queue(); + /* Force FPU initialization. */ current->flags &= ~PF_USEDFPU; current->used_math = 0; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c index da7cd7413e..c218067d2a 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/traps.c @@ -42,6 +42,8 @@ #include <linux/module.h> asmlinkage int system_call(void); +asmlinkage void lcall7(void); +asmlinkage void lcall27(void); asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -362,7 +364,7 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) /* If this is a kernel mode trap, save the user PC on entry to * the kernel, that's what the debugger can make sense of. */ - info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : + info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : (void *)regs->eip; force_sig_info(SIGTRAP, &info, tsk); @@ -530,6 +532,26 @@ asmlinkage void math_state_restore(struct pt_regs regs) current->flags |= PF_USEDFPU; /* So we fnsave on switch_to() */ } + +#define _set_gate(gate_addr,type,dpl,addr) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ + "movw %4,%%dx\n\t" \ + "movl %%eax,%0\n\t" \ + "movl %%edx,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ + :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ + "3" ((char *) (addr)),"2" (__KERNEL_CS << 16)); \ +} while (0) + +static void __init set_call_gate(void *a, void *addr) +{ + _set_gate(a,12,3,addr); +} + + static trap_info_t trap_table[] = { { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, { 1, 0, __KERNEL_CS, (unsigned long)debug }, @@ -561,5 +583,15 @@ void __init trap_init(void) { HYPERVISOR_set_trap_table(trap_table); HYPERVISOR_set_fast_trap(SYSCALL_VECTOR); + + /* + * The default LDT is a single-entry callgate to lcall7 for iBCS and a + * callgate to lcall27 for Solaris/x86 binaries. + */ + clear_page(&default_ldt[0]); + set_call_gate(&default_ldt[0],lcall7); + set_call_gate(&default_ldt[4],lcall27); + __make_page_readonly(&default_ldt[0]); + cpu_init(); } diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c index 93554c3420..d67ad51dc5 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c @@ -81,12 +81,6 @@ static void DEBUG_disallow_pt_read(unsigned long pa) /* - * This is the current pagetable base pointer, which is updated - * on context switch. - */ -unsigned long pt_baseptr; - -/* * MULTICALL_flush_page_update_queue: * This is a version of the flush which queues as part of a multicall. */ @@ -232,3 +226,13 @@ void queue_pte_unpin(unsigned long ptr) increment_index(); spin_unlock_irqrestore(&update_lock, flags); } + +void queue_set_ldt(unsigned long ptr, unsigned long len) +{ + unsigned long flags; + spin_lock_irqsave(&update_lock, flags); + update_queue[idx].ptr = PGREQ_EXTENDED_COMMAND | ptr; + update_queue[idx].val = PGEXT_SET_LDT | (len << PGEXT_CMD_SHIFT); + increment_index(); + spin_unlock_irqrestore(&update_lock, flags); +} diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c index eb24b1ccbf..0806d775f7 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/init.c @@ -236,10 +236,7 @@ void __init paging_init(void) vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; fixrange_init(vaddr, HYPERVISOR_VIRT_START, init_mm.pgd); - /* - * XXX We do this conversion early, so that all other page tables - * will automatically get this mapping. - */ + /* Cheesy: this can probably be moved to the blkdev driver. */ set_fixmap(FIX_BLKRING_BASE, start_info.blk_ring); #ifdef CONFIG_HIGHMEM |