From 3f2546b2ef55b661fd8dd69682b38992225e86f6 Mon Sep 17 00:00:00 2001 From: fishsoupisgood Date: Mon, 29 Apr 2019 01:17:54 +0100 Subject: Initial import of qemu-2.4.1 --- slirp/COPYRIGHT | 61 ++ slirp/Makefile.objs | 3 + slirp/arp_table.c | 89 +++ slirp/bootp.c | 332 +++++++++++ slirp/bootp.h | 126 +++++ slirp/cksum.c | 139 +++++ slirp/debug.h | 34 ++ slirp/dnssearch.c | 314 +++++++++++ slirp/if.c | 237 ++++++++ slirp/if.h | 23 + slirp/ip.h | 249 +++++++++ slirp/ip_icmp.c | 450 +++++++++++++++ slirp/ip_icmp.h | 165 ++++++ slirp/ip_input.c | 668 ++++++++++++++++++++++ slirp/ip_output.c | 172 ++++++ slirp/libslirp.h | 43 ++ slirp/main.h | 50 ++ slirp/mbuf.c | 241 ++++++++ slirp/mbuf.h | 118 ++++ slirp/misc.c | 319 +++++++++++ slirp/misc.h | 53 ++ slirp/sbuf.c | 187 +++++++ slirp/sbuf.h | 30 + slirp/slirp.c | 1207 ++++++++++++++++++++++++++++++++++++++++ slirp/slirp.h | 361 ++++++++++++ slirp/slirp_config.h | 185 +++++++ slirp/socket.c | 720 ++++++++++++++++++++++++ slirp/socket.h | 97 ++++ slirp/tcp.h | 176 ++++++ slirp/tcp_input.c | 1496 ++++++++++++++++++++++++++++++++++++++++++++++++++ slirp/tcp_output.c | 493 +++++++++++++++++ slirp/tcp_subr.c | 926 +++++++++++++++++++++++++++++++ slirp/tcp_timer.c | 292 ++++++++++ slirp/tcp_timer.h | 127 +++++ slirp/tcp_var.h | 161 ++++++ slirp/tcpip.h | 77 +++ slirp/tftp.c | 442 +++++++++++++++ slirp/tftp.h | 49 ++ slirp/udp.c | 394 +++++++++++++ slirp/udp.h | 87 +++ 40 files changed, 11393 insertions(+) create mode 100644 slirp/COPYRIGHT create mode 100644 slirp/Makefile.objs create mode 100644 slirp/arp_table.c create mode 100644 slirp/bootp.c create mode 100644 slirp/bootp.h create mode 100644 slirp/cksum.c create mode 100644 slirp/debug.h create mode 100644 slirp/dnssearch.c create mode 100644 slirp/if.c create mode 100644 slirp/if.h create mode 100644 slirp/ip.h create mode 100644 slirp/ip_icmp.c create mode 100644 slirp/ip_icmp.h create mode 100644 slirp/ip_input.c create mode 100644 slirp/ip_output.c create mode 100644 slirp/libslirp.h create mode 100644 slirp/main.h create mode 100644 slirp/mbuf.c create mode 100644 slirp/mbuf.h create mode 100644 slirp/misc.c create mode 100644 slirp/misc.h create mode 100644 slirp/sbuf.c create mode 100644 slirp/sbuf.h create mode 100644 slirp/slirp.c create mode 100644 slirp/slirp.h create mode 100644 slirp/slirp_config.h create mode 100644 slirp/socket.c create mode 100644 slirp/socket.h create mode 100644 slirp/tcp.h create mode 100644 slirp/tcp_input.c create mode 100644 slirp/tcp_output.c create mode 100644 slirp/tcp_subr.c create mode 100644 slirp/tcp_timer.c create mode 100644 slirp/tcp_timer.h create mode 100644 slirp/tcp_var.h create mode 100644 slirp/tcpip.h create mode 100644 slirp/tftp.c create mode 100644 slirp/tftp.h create mode 100644 slirp/udp.c create mode 100644 slirp/udp.h (limited to 'slirp') diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT new file mode 100644 index 00000000..1bc83d49 --- /dev/null +++ b/slirp/COPYRIGHT @@ -0,0 +1,61 @@ +Slirp was written by Danny Gasparovski. +Copyright (c), 1995,1996 All Rights Reserved. + +Slirp is maintained by Kelly Price + +Slirp is free software; "free" as in you don't have to pay for it, and you +are free to do whatever you want with it. I do not accept any donations, +monetary or otherwise, for Slirp. Instead, I would ask you to pass this +potential donation to your favorite charity. In fact, I encourage +*everyone* who finds Slirp useful to make a small donation to their +favorite charity (for example, GreenPeace). This is not a requirement, but +a suggestion from someone who highly values the service they provide. + +The copyright terms and conditions: + +---BEGIN--- + + Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---END--- + +This basically means you can do anything you want with the software, except +1) call it your own, and 2) claim warranty on it. There is no warranty for +this software. None. Nada. If you lose a million dollars while using +Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. + +If these conditions cannot be met due to legal restrictions (E.g. where it +is against the law to give out Software without warranty), you must cease +using the software and delete all copies you have. + +Slirp uses code that is copyrighted by the following people/organizations: + +Juha Pirkola. +Gregory M. Christy. +The Regents of the University of California. +Carnegie Mellon University. +The Australian National University. +RSA Data Security, Inc. + +Please read the top of each source file for the details on the various +copyrights. diff --git a/slirp/Makefile.objs b/slirp/Makefile.objs new file mode 100644 index 00000000..2daa9dc5 --- /dev/null +++ b/slirp/Makefile.objs @@ -0,0 +1,3 @@ +common-obj-y = cksum.o if.o ip_icmp.o ip_input.o ip_output.o dnssearch.o +common-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o +common-obj-y += tcp_subr.o tcp_timer.o udp.o bootp.o tftp.o arp_table.o diff --git a/slirp/arp_table.c b/slirp/arp_table.c new file mode 100644 index 00000000..bcaeb448 --- /dev/null +++ b/slirp/arp_table.c @@ -0,0 +1,89 @@ +/* + * ARP table + * + * Copyright (c) 2011 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "slirp.h" + +void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]) +{ + const uint32_t broadcast_addr = + ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; + ArpTable *arptbl = &slirp->arp_table; + int i; + + DEBUG_CALL("arp_table_add"); + DEBUG_ARG("ip = 0x%x", ip_addr); + DEBUG_ARGS((dfd, " hw addr = %02x:%02x:%02x:%02x:%02x:%02x\n", + ethaddr[0], ethaddr[1], ethaddr[2], + ethaddr[3], ethaddr[4], ethaddr[5])); + + if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { + /* Do not register broadcast addresses */ + return; + } + + /* Search for an entry */ + for (i = 0; i < ARP_TABLE_SIZE; i++) { + if (arptbl->table[i].ar_sip == ip_addr) { + /* Update the entry */ + memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); + return; + } + } + + /* No entry found, create a new one */ + arptbl->table[arptbl->next_victim].ar_sip = ip_addr; + memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); + arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; +} + +bool arp_table_search(Slirp *slirp, uint32_t ip_addr, + uint8_t out_ethaddr[ETH_ALEN]) +{ + const uint32_t broadcast_addr = + ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; + ArpTable *arptbl = &slirp->arp_table; + int i; + + DEBUG_CALL("arp_table_search"); + DEBUG_ARG("ip = 0x%x", ip_addr); + + /* If broadcast address */ + if (ip_addr == 0xffffffff || ip_addr == broadcast_addr) { + /* return Ethernet broadcast address */ + memset(out_ethaddr, 0xff, ETH_ALEN); + return 1; + } + + for (i = 0; i < ARP_TABLE_SIZE; i++) { + if (arptbl->table[i].ar_sip == ip_addr) { + memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); + DEBUG_ARGS((dfd, " found hw addr = %02x:%02x:%02x:%02x:%02x:%02x\n", + out_ethaddr[0], out_ethaddr[1], out_ethaddr[2], + out_ethaddr[3], out_ethaddr[4], out_ethaddr[5])); + return 1; + } + } + + return 0; +} diff --git a/slirp/bootp.c b/slirp/bootp.c new file mode 100644 index 00000000..b7db9fa3 --- /dev/null +++ b/slirp/bootp.c @@ -0,0 +1,332 @@ +/* + * QEMU BOOTP/DHCP server + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include + +/* XXX: only DHCP is supported */ + +#define LEASE_TIME (24 * 3600) + +static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; + +#ifdef DEBUG +#define DPRINTF(fmt, ...) \ +do if (slirp_debug & DBG_CALL) { fprintf(dfd, fmt, ## __VA_ARGS__); fflush(dfd); } while (0) +#else +#define DPRINTF(fmt, ...) do{}while(0) +#endif + +static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, + const uint8_t *macaddr) +{ + BOOTPClient *bc; + int i; + + for(i = 0; i < NB_BOOTP_CLIENTS; i++) { + bc = &slirp->bootp_clients[i]; + if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) + goto found; + } + return NULL; + found: + bc = &slirp->bootp_clients[i]; + bc->allocated = 1; + paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); + return bc; +} + +static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, + const uint8_t *macaddr) +{ + uint32_t req_addr = ntohl(paddr->s_addr); + uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); + BOOTPClient *bc; + + if (req_addr >= dhcp_addr && + req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { + bc = &slirp->bootp_clients[req_addr - dhcp_addr]; + if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { + bc->allocated = 1; + return bc; + } + } + return NULL; +} + +static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, + const uint8_t *macaddr) +{ + BOOTPClient *bc; + int i; + + for(i = 0; i < NB_BOOTP_CLIENTS; i++) { + if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) + goto found; + } + return NULL; + found: + bc = &slirp->bootp_clients[i]; + bc->allocated = 1; + paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); + return bc; +} + +static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, + struct in_addr *preq_addr) +{ + const uint8_t *p, *p_end; + int len, tag; + + *pmsg_type = 0; + preq_addr->s_addr = htonl(0L); + + p = bp->bp_vend; + p_end = p + DHCP_OPT_LEN; + if (memcmp(p, rfc1533_cookie, 4) != 0) + return; + p += 4; + while (p < p_end) { + tag = p[0]; + if (tag == RFC1533_PAD) { + p++; + } else if (tag == RFC1533_END) { + break; + } else { + p++; + if (p >= p_end) + break; + len = *p++; + DPRINTF("dhcp: tag=%d len=%d\n", tag, len); + + switch(tag) { + case RFC2132_MSG_TYPE: + if (len >= 1) + *pmsg_type = p[0]; + break; + case RFC2132_REQ_ADDR: + if (len >= 4) { + memcpy(&(preq_addr->s_addr), p, 4); + } + break; + default: + break; + } + p += len; + } + } + if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && + bp->bp_ciaddr.s_addr) { + memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); + } +} + +static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) +{ + BOOTPClient *bc = NULL; + struct mbuf *m; + struct bootp_t *rbp; + struct sockaddr_in saddr, daddr; + struct in_addr preq_addr; + int dhcp_msg_type, val; + uint8_t *q; + uint8_t client_ethaddr[ETH_ALEN]; + + /* extract exact DHCP msg type */ + dhcp_decode(bp, &dhcp_msg_type, &preq_addr); + DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); + if (preq_addr.s_addr != htonl(0L)) + DPRINTF(" req_addr=%08x\n", ntohl(preq_addr.s_addr)); + else + DPRINTF("\n"); + + if (dhcp_msg_type == 0) + dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ + + if (dhcp_msg_type != DHCPDISCOVER && + dhcp_msg_type != DHCPREQUEST) + return; + + /* Get client's hardware address from bootp request */ + memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); + + m = m_get(slirp); + if (!m) { + return; + } + m->m_data += IF_MAXLINKHDR; + rbp = (struct bootp_t *)m->m_data; + m->m_data += sizeof(struct udpiphdr); + memset(rbp, 0, sizeof(struct bootp_t)); + + if (dhcp_msg_type == DHCPDISCOVER) { + if (preq_addr.s_addr != htonl(0L)) { + bc = request_addr(slirp, &preq_addr, client_ethaddr); + if (bc) { + daddr.sin_addr = preq_addr; + } + } + if (!bc) { + new_addr: + bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); + if (!bc) { + DPRINTF("no address left\n"); + return; + } + } + memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); + } else if (preq_addr.s_addr != htonl(0L)) { + bc = request_addr(slirp, &preq_addr, client_ethaddr); + if (bc) { + daddr.sin_addr = preq_addr; + memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); + } else { + /* DHCPNAKs should be sent to broadcast */ + daddr.sin_addr.s_addr = 0xffffffff; + } + } else { + bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); + if (!bc) { + /* if never assigned, behaves as if it was already + assigned (windows fix because it remembers its address) */ + goto new_addr; + } + } + + /* Update ARP table for this IP address */ + arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); + + saddr.sin_addr = slirp->vhost_addr; + saddr.sin_port = htons(BOOTP_SERVER); + + daddr.sin_port = htons(BOOTP_CLIENT); + + rbp->bp_op = BOOTP_REPLY; + rbp->bp_xid = bp->bp_xid; + rbp->bp_htype = 1; + rbp->bp_hlen = 6; + memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); + + rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ + rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ + + q = rbp->bp_vend; + memcpy(q, rfc1533_cookie, 4); + q += 4; + + if (bc) { + DPRINTF("%s addr=%08x\n", + (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", + ntohl(daddr.sin_addr.s_addr)); + + if (dhcp_msg_type == DHCPDISCOVER) { + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPOFFER; + } else /* DHCPREQUEST */ { + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPACK; + } + + if (slirp->bootp_filename) + snprintf((char *)rbp->bp_file, sizeof(rbp->bp_file), "%s", + slirp->bootp_filename); + + *q++ = RFC2132_SRV_ID; + *q++ = 4; + memcpy(q, &saddr.sin_addr, 4); + q += 4; + + *q++ = RFC1533_NETMASK; + *q++ = 4; + memcpy(q, &slirp->vnetwork_mask, 4); + q += 4; + + if (!slirp->restricted) { + *q++ = RFC1533_GATEWAY; + *q++ = 4; + memcpy(q, &saddr.sin_addr, 4); + q += 4; + + *q++ = RFC1533_DNS; + *q++ = 4; + memcpy(q, &slirp->vnameserver_addr, 4); + q += 4; + } + + *q++ = RFC2132_LEASE_TIME; + *q++ = 4; + val = htonl(LEASE_TIME); + memcpy(q, &val, 4); + q += 4; + + if (*slirp->client_hostname) { + val = strlen(slirp->client_hostname); + *q++ = RFC1533_HOSTNAME; + *q++ = val; + memcpy(q, slirp->client_hostname, val); + q += val; + } + + if (slirp->vdnssearch) { + size_t spaceleft = sizeof(rbp->bp_vend) - (q - rbp->bp_vend); + val = slirp->vdnssearch_len; + if (val + 1 > spaceleft) { + g_warning("DHCP packet size exceeded, " + "omitting domain-search option."); + } else { + memcpy(q, slirp->vdnssearch, val); + q += val; + } + } + } else { + static const char nak_msg[] = "requested address not available"; + + DPRINTF("nak'ed addr=%08x\n", ntohl(preq_addr.s_addr)); + + *q++ = RFC2132_MSG_TYPE; + *q++ = 1; + *q++ = DHCPNAK; + + *q++ = RFC2132_MESSAGE; + *q++ = sizeof(nak_msg) - 1; + memcpy(q, nak_msg, sizeof(nak_msg) - 1); + q += sizeof(nak_msg) - 1; + } + *q = RFC1533_END; + + daddr.sin_addr.s_addr = 0xffffffffu; + + m->m_len = sizeof(struct bootp_t) - + sizeof(struct ip) - sizeof(struct udphdr); + udp_output2(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); +} + +void bootp_input(struct mbuf *m) +{ + struct bootp_t *bp = mtod(m, struct bootp_t *); + + if (bp->bp_op == BOOTP_REQUEST) { + bootp_reply(m->slirp, bp); + } +} diff --git a/slirp/bootp.h b/slirp/bootp.h new file mode 100644 index 00000000..ec3b6870 --- /dev/null +++ b/slirp/bootp.h @@ -0,0 +1,126 @@ +/* bootp/dhcp defines */ +#ifndef SLIRP_BOOTP_H +#define SLIRP_BOOTP_H 1 + +#define BOOTP_SERVER 67 +#define BOOTP_CLIENT 68 + +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +#define RFC1533_COOKIE 99, 130, 83, 99 +#define RFC1533_PAD 0 +#define RFC1533_NETMASK 1 +#define RFC1533_TIMEOFFSET 2 +#define RFC1533_GATEWAY 3 +#define RFC1533_TIMESERVER 4 +#define RFC1533_IEN116NS 5 +#define RFC1533_DNS 6 +#define RFC1533_LOGSERVER 7 +#define RFC1533_COOKIESERVER 8 +#define RFC1533_LPRSERVER 9 +#define RFC1533_IMPRESSSERVER 10 +#define RFC1533_RESOURCESERVER 11 +#define RFC1533_HOSTNAME 12 +#define RFC1533_BOOTFILESIZE 13 +#define RFC1533_MERITDUMPFILE 14 +#define RFC1533_DOMAINNAME 15 +#define RFC1533_SWAPSERVER 16 +#define RFC1533_ROOTPATH 17 +#define RFC1533_EXTENSIONPATH 18 +#define RFC1533_IPFORWARDING 19 +#define RFC1533_IPSOURCEROUTING 20 +#define RFC1533_IPPOLICYFILTER 21 +#define RFC1533_IPMAXREASSEMBLY 22 +#define RFC1533_IPTTL 23 +#define RFC1533_IPMTU 24 +#define RFC1533_IPMTUPLATEAU 25 +#define RFC1533_INTMTU 26 +#define RFC1533_INTLOCALSUBNETS 27 +#define RFC1533_INTBROADCAST 28 +#define RFC1533_INTICMPDISCOVER 29 +#define RFC1533_INTICMPRESPOND 30 +#define RFC1533_INTROUTEDISCOVER 31 +#define RFC1533_INTROUTESOLICIT 32 +#define RFC1533_INTSTATICROUTES 33 +#define RFC1533_LLTRAILERENCAP 34 +#define RFC1533_LLARPCACHETMO 35 +#define RFC1533_LLETHERNETENCAP 36 +#define RFC1533_TCPTTL 37 +#define RFC1533_TCPKEEPALIVETMO 38 +#define RFC1533_TCPKEEPALIVEGB 39 +#define RFC1533_NISDOMAIN 40 +#define RFC1533_NISSERVER 41 +#define RFC1533_NTPSERVER 42 +#define RFC1533_VENDOR 43 +#define RFC1533_NBNS 44 +#define RFC1533_NBDD 45 +#define RFC1533_NBNT 46 +#define RFC1533_NBSCOPE 47 +#define RFC1533_XFS 48 +#define RFC1533_XDM 49 + +#define RFC2132_REQ_ADDR 50 +#define RFC2132_LEASE_TIME 51 +#define RFC2132_MSG_TYPE 53 +#define RFC2132_SRV_ID 54 +#define RFC2132_PARAM_LIST 55 +#define RFC2132_MESSAGE 56 +#define RFC2132_MAX_SIZE 57 +#define RFC2132_RENEWAL_TIME 58 +#define RFC2132_REBIND_TIME 59 + +#define DHCPDISCOVER 1 +#define DHCPOFFER 2 +#define DHCPREQUEST 3 +#define DHCPACK 5 +#define DHCPNAK 6 + +#define RFC1533_VENDOR_MAJOR 0 +#define RFC1533_VENDOR_MINOR 0 + +#define RFC1533_VENDOR_MAGIC 128 +#define RFC1533_VENDOR_ADDPARM 129 +#define RFC1533_VENDOR_ETHDEV 130 +#define RFC1533_VENDOR_HOWTO 132 +#define RFC1533_VENDOR_MNUOPTS 160 +#define RFC1533_VENDOR_SELECTION 176 +#define RFC1533_VENDOR_MOTD 184 +#define RFC1533_VENDOR_NUMOFMOTD 8 +#define RFC1533_VENDOR_IMG 192 +#define RFC1533_VENDOR_NUMOFIMG 16 + +#define RFC1533_END 255 +#define BOOTP_VENDOR_LEN 64 +#define DHCP_OPT_LEN 312 + +struct bootp_t { + struct ip ip; + struct udphdr udp; + uint8_t bp_op; + uint8_t bp_htype; + uint8_t bp_hlen; + uint8_t bp_hops; + uint32_t bp_xid; + uint16_t bp_secs; + uint16_t unused; + struct in_addr bp_ciaddr; + struct in_addr bp_yiaddr; + struct in_addr bp_siaddr; + struct in_addr bp_giaddr; + uint8_t bp_hwaddr[16]; + uint8_t bp_sname[64]; + uint8_t bp_file[128]; + uint8_t bp_vend[DHCP_OPT_LEN]; +}; + +typedef struct { + uint16_t allocated; + uint8_t macaddr[6]; +} BOOTPClient; + +#define NB_BOOTP_CLIENTS 16 + +void bootp_input(struct mbuf *m); + +#endif diff --git a/slirp/cksum.c b/slirp/cksum.c new file mode 100644 index 00000000..63286600 --- /dev/null +++ b/slirp/cksum.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp + */ + +#include + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * XXX Since we will never span more than 1 mbuf, we can optimise this + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; \ + (void)ADDCARRY(sum);} + +int cksum(struct mbuf *m, int len) +{ + register uint16_t *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union { + uint8_t c[2]; + uint16_t s; + } s_util; + union { + uint16_t s[2]; + uint32_t l; + } l_util; + + if (m->m_len == 0) + goto cont; + w = mtod(m, uint16_t *); + + mlen = m->m_len; + + if (len < mlen) + mlen = len; +#ifdef DEBUG + len -= mlen; +#endif + /* + * Force to even boundary. + */ + if ((1 & (uintptr_t)w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(uint8_t *)w; + w = (uint16_t *)((int8_t *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + goto cont; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + + if (byte_swapped) { + REDUCE; + sum <<= 8; + if (mlen == -1) { + s_util.c[1] = *(uint8_t *)w; + sum += s_util.s; + mlen = 0; + } else + + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(uint8_t *)w; + +cont: +#ifdef DEBUG + if (len) { + DEBUG_ERROR((dfd, "cksum: out of data\n")); + DEBUG_ERROR((dfd, " len = %d\n", len)); + } +#endif + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} diff --git a/slirp/debug.h b/slirp/debug.h new file mode 100644 index 00000000..6cfa61ed --- /dev/null +++ b/slirp/debug.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +//#define DEBUG 1 + +#ifdef DEBUG + +#define DBG_CALL 0x1 +#define DBG_MISC 0x2 +#define DBG_ERROR 0x4 + +#define dfd stderr + +extern int slirp_debug; + +#define DEBUG_CALL(x) if (slirp_debug & DBG_CALL) { fprintf(dfd, "%s...\n", x); fflush(dfd); } +#define DEBUG_ARG(x, y) if (slirp_debug & DBG_CALL) { fputc(' ', dfd); fprintf(dfd, x, y); fputc('\n', dfd); fflush(dfd); } +#define DEBUG_ARGS(x) if (slirp_debug & DBG_CALL) { fprintf x ; fflush(dfd); } +#define DEBUG_MISC(x) if (slirp_debug & DBG_MISC) { fprintf x ; fflush(dfd); } +#define DEBUG_ERROR(x) if (slirp_debug & DBG_ERROR) {fprintf x ; fflush(dfd); } + +#else + +#define DEBUG_CALL(x) +#define DEBUG_ARG(x, y) +#define DEBUG_ARGS(x) +#define DEBUG_MISC(x) +#define DEBUG_ERROR(x) + +#endif diff --git a/slirp/dnssearch.c b/slirp/dnssearch.c new file mode 100644 index 00000000..4c9064ec --- /dev/null +++ b/slirp/dnssearch.c @@ -0,0 +1,314 @@ +/* + * Domain search option for DHCP (RFC 3397) + * + * Copyright (c) 2012 Klaus Stengel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "slirp.h" + +static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; +static const uint8_t MAX_OPT_LEN = 255; +static const uint8_t OPT_HEADER_LEN = 2; +static const uint8_t REFERENCE_LEN = 2; + +struct compact_domain; + +typedef struct compact_domain { + struct compact_domain *self; + struct compact_domain *refdom; + uint8_t *labels; + size_t len; + size_t common_octets; +} CompactDomain; + +static size_t +domain_suffix_diffoff(const CompactDomain *a, const CompactDomain *b) +{ + size_t la = a->len, lb = b->len; + uint8_t *da = a->labels + la, *db = b->labels + lb; + size_t i, lm = (la < lb) ? la : lb; + + for (i = 0; i < lm; i++) { + da--; db--; + if (*da != *db) { + break; + } + } + return i; +} + +static int domain_suffix_ord(const void *cva, const void *cvb) +{ + const CompactDomain *a = cva, *b = cvb; + size_t la = a->len, lb = b->len; + size_t doff = domain_suffix_diffoff(a, b); + uint8_t ca = a->labels[la - doff]; + uint8_t cb = b->labels[lb - doff]; + + if (ca < cb) { + return -1; + } + if (ca > cb) { + return 1; + } + if (la < lb) { + return -1; + } + if (la > lb) { + return 1; + } + return 0; +} + +static size_t domain_common_label(CompactDomain *a, CompactDomain *b) +{ + size_t res, doff = domain_suffix_diffoff(a, b); + uint8_t *first_eq_pos = a->labels + (a->len - doff); + uint8_t *label = a->labels; + + while (*label && label < first_eq_pos) { + label += *label + 1; + } + res = a->len - (label - a->labels); + /* only report if it can help to reduce the packet size */ + return (res > REFERENCE_LEN) ? res : 0; +} + +static void domain_fixup_order(CompactDomain *cd, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + CompactDomain *cur = cd + i, *next = cd[i].self; + + while (!cur->common_octets) { + CompactDomain *tmp = next->self; /* backup target value */ + + next->self = cur; + cur->common_octets++; + + cur = next; + next = tmp; + } + } +} + +static void domain_mklabels(CompactDomain *cd, const char *input) +{ + uint8_t *len_marker = cd->labels; + uint8_t *output = len_marker; /* pre-incremented */ + const char *in = input; + char cur_chr; + size_t len = 0; + + if (cd->len == 0) { + goto fail; + } + cd->len++; + + do { + cur_chr = *in++; + if (cur_chr == '.' || cur_chr == '\0') { + len = output - len_marker; + if ((len == 0 && cur_chr == '.') || len >= 64) { + goto fail; + } + *len_marker = len; + + output++; + len_marker = output; + } else { + output++; + *output = cur_chr; + } + } while (cur_chr != '\0'); + + /* ensure proper zero-termination */ + if (len != 0) { + *len_marker = 0; + cd->len++; + } + return; + +fail: + g_warning("failed to parse domain name '%s'\n", input); + cd->len = 0; +} + +static void +domain_mkxrefs(CompactDomain *doms, CompactDomain *last, size_t depth) +{ + CompactDomain *i = doms, *target = doms; + + do { + if (i->labels < target->labels) { + target = i; + } + } while (i++ != last); + + for (i = doms; i != last; i++) { + CompactDomain *group_last; + size_t next_depth; + + if (i->common_octets == depth) { + continue; + } + + next_depth = -1; + for (group_last = i; group_last != last; group_last++) { + size_t co = group_last->common_octets; + if (co <= depth) { + break; + } + if (co < next_depth) { + next_depth = co; + } + } + domain_mkxrefs(i, group_last, next_depth); + + i = group_last; + if (i == last) { + break; + } + } + + if (depth == 0) { + return; + } + + i = doms; + do { + if (i != target && i->refdom == NULL) { + i->refdom = target; + i->common_octets = depth; + } + } while (i++ != last); +} + +static size_t domain_compactify(CompactDomain *domains, size_t n) +{ + uint8_t *start = domains->self->labels, *outptr = start; + size_t i; + + for (i = 0; i < n; i++) { + CompactDomain *cd = domains[i].self; + CompactDomain *rd = cd->refdom; + + if (rd != NULL) { + size_t moff = (rd->labels - start) + + (rd->len - cd->common_octets); + if (moff < 0x3FFFu) { + cd->len -= cd->common_octets - 2; + cd->labels[cd->len - 1] = moff & 0xFFu; + cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); + } + } + + if (cd->labels != outptr) { + memmove(outptr, cd->labels, cd->len); + cd->labels = outptr; + } + outptr += cd->len; + } + return outptr - start; +} + +int translate_dnssearch(Slirp *s, const char **names) +{ + size_t blocks, bsrc_start, bsrc_end, bdst_start; + size_t i, num_domains, memreq = 0; + uint8_t *result = NULL, *outptr; + CompactDomain *domains = NULL; + const char **nameptr = names; + + while (*nameptr != NULL) { + nameptr++; + } + + num_domains = nameptr - names; + if (num_domains == 0) { + return -2; + } + + domains = g_malloc(num_domains * sizeof(*domains)); + + for (i = 0; i < num_domains; i++) { + size_t nlen = strlen(names[i]); + memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ + domains[i].self = domains + i; + domains[i].len = nlen; + domains[i].common_octets = 0; + domains[i].refdom = NULL; + } + + /* reserve extra 2 header bytes for each 255 bytes of output */ + memreq += ((memreq + MAX_OPT_LEN - 1) / MAX_OPT_LEN) * OPT_HEADER_LEN; + result = g_malloc(memreq * sizeof(*result)); + + outptr = result; + for (i = 0; i < num_domains; i++) { + domains[i].labels = outptr; + domain_mklabels(domains + i, names[i]); + outptr += domains[i].len; + } + + if (outptr == result) { + g_free(domains); + g_free(result); + return -1; + } + + qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); + domain_fixup_order(domains, num_domains); + + for (i = 1; i < num_domains; i++) { + size_t cl = domain_common_label(domains + i - 1, domains + i); + domains[i - 1].common_octets = cl; + } + + domain_mkxrefs(domains, domains + num_domains - 1, 0); + memreq = domain_compactify(domains, num_domains); + + blocks = (memreq + MAX_OPT_LEN - 1) / MAX_OPT_LEN; + bsrc_end = memreq; + bsrc_start = (blocks - 1) * MAX_OPT_LEN; + bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; + memreq += blocks * OPT_HEADER_LEN; + + while (blocks--) { + size_t len = bsrc_end - bsrc_start; + memmove(result + bdst_start, result + bsrc_start, len); + result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; + result[bdst_start - 1] = len; + bsrc_end = bsrc_start; + bsrc_start -= MAX_OPT_LEN; + bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; + } + + g_free(domains); + s->vdnssearch = result; + s->vdnssearch_len = memreq; + return 0; +} diff --git a/slirp/if.c b/slirp/if.c new file mode 100644 index 00000000..fb7acf87 --- /dev/null +++ b/slirp/if.c @@ -0,0 +1,237 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include +#include "qemu/timer.h" + +static void +ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) +{ + ifm->ifs_next = ifmhead->ifs_next; + ifmhead->ifs_next = ifm; + ifm->ifs_prev = ifmhead; + ifm->ifs_next->ifs_prev = ifm; +} + +static void +ifs_remque(struct mbuf *ifm) +{ + ifm->ifs_prev->ifs_next = ifm->ifs_next; + ifm->ifs_next->ifs_prev = ifm->ifs_prev; +} + +void +if_init(Slirp *slirp) +{ + slirp->if_fastq.ifq_next = slirp->if_fastq.ifq_prev = &slirp->if_fastq; + slirp->if_batchq.ifq_next = slirp->if_batchq.ifq_prev = &slirp->if_batchq; + slirp->next_m = &slirp->if_batchq; +} + +/* + * if_output: Queue packet into an output queue. + * There are 2 output queue's, if_fastq and if_batchq. + * Each output queue is a doubly linked list of double linked lists + * of mbufs, each list belonging to one "session" (socket). This + * way, we can output packets fairly by sending one packet from each + * session, instead of all the packets from one session, then all packets + * from the next session, etc. Packets on the if_fastq get absolute + * priority, but if one session hogs the link, it gets "downgraded" + * to the batchq until it runs out of packets, then it'll return + * to the fastq (eg. if the user does an ls -alR in a telnet session, + * it'll temporarily get downgraded to the batchq) + */ +void +if_output(struct socket *so, struct mbuf *ifm) +{ + Slirp *slirp = ifm->slirp; + struct mbuf *ifq; + int on_fastq = 1; + + DEBUG_CALL("if_output"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("ifm = %lx", (long)ifm); + + /* + * First remove the mbuf from m_usedlist, + * since we're gonna use m_next and m_prev ourselves + * XXX Shouldn't need this, gotta change dtom() etc. + */ + if (ifm->m_flags & M_USEDLIST) { + remque(ifm); + ifm->m_flags &= ~M_USEDLIST; + } + + /* + * See if there's already a batchq list for this session. + * This can include an interactive session, which should go on fastq, + * but gets too greedy... hence it'll be downgraded from fastq to batchq. + * We mustn't put this packet back on the fastq (or we'll send it out of order) + * XXX add cache here? + */ + for (ifq = slirp->if_batchq.ifq_prev; ifq != &slirp->if_batchq; + ifq = ifq->ifq_prev) { + if (so == ifq->ifq_so) { + /* A match! */ + ifm->ifq_so = so; + ifs_insque(ifm, ifq->ifs_prev); + goto diddit; + } + } + + /* No match, check which queue to put it on */ + if (so && (so->so_iptos & IPTOS_LOWDELAY)) { + ifq = slirp->if_fastq.ifq_prev; + on_fastq = 1; + /* + * Check if this packet is a part of the last + * packet's session + */ + if (ifq->ifq_so == so) { + ifm->ifq_so = so; + ifs_insque(ifm, ifq->ifs_prev); + goto diddit; + } + } else { + ifq = slirp->if_batchq.ifq_prev; + /* Set next_m if the queue was empty so far */ + if (slirp->next_m == &slirp->if_batchq) { + slirp->next_m = ifm; + } + } + + /* Create a new doubly linked list for this session */ + ifm->ifq_so = so; + ifs_init(ifm); + insque(ifm, ifq); + +diddit: + if (so) { + /* Update *_queued */ + so->so_queued++; + so->so_nqueued++; + /* + * Check if the interactive session should be downgraded to + * the batchq. A session is downgraded if it has queued 6 + * packets without pausing, and at least 3 of those packets + * have been sent over the link + * (XXX These are arbitrary numbers, probably not optimal..) + */ + if (on_fastq && ((so->so_nqueued >= 6) && + (so->so_nqueued - so->so_queued) >= 3)) { + + /* Remove from current queue... */ + remque(ifm->ifs_next); + + /* ...And insert in the new. That'll teach ya! */ + insque(ifm->ifs_next, &slirp->if_batchq); + } + } + +#ifndef FULL_BOLT + /* + * This prevents us from malloc()ing too many mbufs + */ + if_start(ifm->slirp); +#endif +} + +/* + * Send a packet + * We choose a packet based on its position in the output queues; + * If there are packets on the fastq, they are sent FIFO, before + * everything else. Otherwise we choose the first packet from the + * batchq and send it. the next packet chosen will be from the session + * after this one, then the session after that one, and so on.. So, + * for example, if there are 3 ftp session's fighting for bandwidth, + * one packet will be sent from the first session, then one packet + * from the second session, then one packet from the third, then back + * to the first, etc. etc. + */ +void if_start(Slirp *slirp) +{ + uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + bool from_batchq, next_from_batchq; + struct mbuf *ifm, *ifm_next, *ifqt; + + DEBUG_CALL("if_start"); + + if (slirp->if_start_busy) { + return; + } + slirp->if_start_busy = true; + + if (slirp->if_fastq.ifq_next != &slirp->if_fastq) { + ifm_next = slirp->if_fastq.ifq_next; + next_from_batchq = false; + } else if (slirp->next_m != &slirp->if_batchq) { + /* Nothing on fastq, pick up from batchq via next_m */ + ifm_next = slirp->next_m; + next_from_batchq = true; + } else { + ifm_next = NULL; + } + + while (ifm_next) { + ifm = ifm_next; + from_batchq = next_from_batchq; + + ifm_next = ifm->ifq_next; + if (ifm_next == &slirp->if_fastq) { + /* No more packets in fastq, switch to batchq */ + ifm_next = slirp->next_m; + next_from_batchq = true; + } + if (ifm_next == &slirp->if_batchq) { + /* end of batchq */ + ifm_next = NULL; + } + + /* Try to send packet unless it already expired */ + if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { + /* Packet is delayed due to pending ARP resolution */ + continue; + } + + if (ifm == slirp->next_m) { + /* Set which packet to send on next iteration */ + slirp->next_m = ifm->ifq_next; + } + + /* Remove it from the queue */ + ifqt = ifm->ifq_prev; + remque(ifm); + + /* If there are more packets for this session, re-queue them */ + if (ifm->ifs_next != ifm) { + struct mbuf *next = ifm->ifs_next; + + insque(next, ifqt); + ifs_remque(ifm); + + if (!from_batchq) { + /* Next packet in fastq is from the same session */ + ifm_next = next; + next_from_batchq = false; + } else if (slirp->next_m == &slirp->if_batchq) { + /* Set next_m and ifm_next if the session packet is now the + * only one on batchq */ + slirp->next_m = ifm_next = next; + } + } + + /* Update so_queued */ + if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { + /* If there's no more queued, reset nqueued */ + ifm->ifq_so->so_nqueued = 0; + } + + m_free(ifm); + } + + slirp->if_start_busy = false; +} diff --git a/slirp/if.h b/slirp/if.h new file mode 100644 index 00000000..33270239 --- /dev/null +++ b/slirp/if.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _IF_H_ +#define _IF_H_ + +#define IF_COMPRESS 0x01 /* We want compression */ +#define IF_NOCOMPRESS 0x02 /* Do not do compression */ +#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ +#define IF_NOCIDCOMP 0x08 /* CID compression */ + +#define IF_MTU 1500 +#define IF_MRU 1500 +#define IF_COMP IF_AUTOCOMP /* Flags for compression */ + +/* 2 for alignment, 14 for ethernet, 40 for TCP/IP */ +#define IF_MAXLINKHDR (2 + 14 + 40) + +#endif diff --git a/slirp/ip.h b/slirp/ip.h new file mode 100644 index 00000000..e2ee5e30 --- /dev/null +++ b/slirp/ip.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip.h 8.1 (Berkeley) 6/10/93 + * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp + */ + +#ifndef _IP_H_ +#define _IP_H_ + +#ifdef HOST_WORDS_BIGENDIAN +# undef NTOHL +# undef NTOHS +# undef HTONL +# undef HTONS +# define NTOHL(d) +# define NTOHS(d) +# define HTONL(d) +# define HTONS(d) +#else +# ifndef NTOHL +# define NTOHL(d) ((d) = ntohl((d))) +# endif +# ifndef NTOHS +# define NTOHS(d) ((d) = ntohs((uint16_t)(d))) +# endif +# ifndef HTONL +# define HTONL(d) ((d) = htonl((d))) +# endif +# ifndef HTONS +# define HTONS(d) ((d) = htons((uint16_t)(d))) +# endif +#endif + +typedef uint32_t n_long; /* long as received from the net */ + +/* + * Definitions for internet protocol version 4. + * Per RFC 791, September 1981. + */ +#define IPVERSION 4 + +/* + * Structure of an internet header, naked of options. + */ +struct ip { +#ifdef HOST_WORDS_BIGENDIAN + uint8_t ip_v:4, /* version */ + ip_hl:4; /* header length */ +#else + uint8_t ip_hl:4, /* header length */ + ip_v:4; /* version */ +#endif + uint8_t ip_tos; /* type of service */ + uint16_t ip_len; /* total length */ + uint16_t ip_id; /* identification */ + uint16_t ip_off; /* fragment offset field */ +#define IP_DF 0x4000 /* don't fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + uint8_t ip_ttl; /* time to live */ + uint8_t ip_p; /* protocol */ + uint16_t ip_sum; /* checksum */ + struct in_addr ip_src,ip_dst; /* source and dest address */ +} QEMU_PACKED; + +#define IP_MAXPACKET 65535 /* maximum packet size */ + +/* + * Definitions for IP type of service (ip_tos) + */ +#define IPTOS_LOWDELAY 0x10 +#define IPTOS_THROUGHPUT 0x08 +#define IPTOS_RELIABILITY 0x04 + +/* + * Definitions for options. + */ +#define IPOPT_COPIED(o) ((o)&0x80) +#define IPOPT_CLASS(o) ((o)&0x60) +#define IPOPT_NUMBER(o) ((o)&0x1f) + +#define IPOPT_CONTROL 0x00 +#define IPOPT_RESERVED1 0x20 +#define IPOPT_DEBMEAS 0x40 +#define IPOPT_RESERVED2 0x60 + +#define IPOPT_EOL 0 /* end of option list */ +#define IPOPT_NOP 1 /* no operation */ + +#define IPOPT_RR 7 /* record packet route */ +#define IPOPT_TS 68 /* timestamp */ +#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ +#define IPOPT_LSRR 131 /* loose source route */ +#define IPOPT_SATID 136 /* satnet id */ +#define IPOPT_SSRR 137 /* strict source route */ + +/* + * Offsets to fields in options other than EOL and NOP. + */ +#define IPOPT_OPTVAL 0 /* option ID */ +#define IPOPT_OLEN 1 /* option length */ +#define IPOPT_OFFSET 2 /* offset within option */ +#define IPOPT_MINOFF 4 /* min value of above */ + +/* + * Time stamp option structure. + */ +struct ip_timestamp { + uint8_t ipt_code; /* IPOPT_TS */ + uint8_t ipt_len; /* size of structure (variable) */ + uint8_t ipt_ptr; /* index of current entry */ +#ifdef HOST_WORDS_BIGENDIAN + uint8_t ipt_oflw:4, /* overflow counter */ + ipt_flg:4; /* flags, see below */ +#else + uint8_t ipt_flg:4, /* flags, see below */ + ipt_oflw:4; /* overflow counter */ +#endif + union ipt_timestamp { + n_long ipt_time[1]; + struct ipt_ta { + struct in_addr ipt_addr; + n_long ipt_time; + } ipt_ta[1]; + } ipt_timestamp; +} QEMU_PACKED; + +/* flag bits for ipt_flg */ +#define IPOPT_TS_TSONLY 0 /* timestamps only */ +#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ +#define IPOPT_TS_PRESPEC 3 /* specified modules only */ + +/* bits for security (not byte swapped) */ +#define IPOPT_SECUR_UNCLASS 0x0000 +#define IPOPT_SECUR_CONFID 0xf135 +#define IPOPT_SECUR_EFTO 0x789a +#define IPOPT_SECUR_MMMM 0xbc4d +#define IPOPT_SECUR_RESTR 0xaf13 +#define IPOPT_SECUR_SECRET 0xd788 +#define IPOPT_SECUR_TOPSECRET 0x6bc5 + +/* + * Internet implementation parameters. + */ +#define MAXTTL 255 /* maximum time to live (seconds) */ +#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ +#define IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define IPTTLDEC 1 /* subtracted when forwarding */ + +#define IP_MSS 576 /* default maximum segment size */ + +#if SIZEOF_CHAR_P == 4 +struct mbuf_ptr { + struct mbuf *mptr; + uint32_t dummy; +} QEMU_PACKED; +#else +struct mbuf_ptr { + struct mbuf *mptr; +} QEMU_PACKED; +#endif +struct qlink { + void *next, *prev; +}; + +/* + * Overlay for ip header used by other protocols (tcp, udp). + */ +struct ipovly { + struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ + uint8_t ih_x1; /* (unused) */ + uint8_t ih_pr; /* protocol */ + uint16_t ih_len; /* protocol length */ + struct in_addr ih_src; /* source internet address */ + struct in_addr ih_dst; /* destination internet address */ +} QEMU_PACKED; + +/* + * Ip reassembly queue structure. Each fragment + * being reassembled is attached to one of these structures. + * They are timed out after ipq_ttl drops to 0, and may also + * be reclaimed if memory becomes tight. + * size 28 bytes + */ +struct ipq { + struct qlink frag_link; /* to ip headers of fragments */ + struct qlink ip_link; /* to other reass headers */ + uint8_t ipq_ttl; /* time for reass q to live */ + uint8_t ipq_p; /* protocol of this fragment */ + uint16_t ipq_id; /* sequence id for reassembly */ + struct in_addr ipq_src,ipq_dst; +} QEMU_PACKED; + +/* + * Ip header, when holding a fragment. + * + * Note: ipf_link must be at same offset as frag_link above + */ +struct ipasfrag { + struct qlink ipf_link; + struct ip ipf_ip; +} QEMU_PACKED; + +#define ipf_off ipf_ip.ip_off +#define ipf_tos ipf_ip.ip_tos +#define ipf_len ipf_ip.ip_len +#define ipf_next ipf_link.next +#define ipf_prev ipf_link.prev + +/* + * Structure stored in mbuf in inpcb.ip_options + * and passed to ip_output when ip options are in use. + * The actual length of the options (including ipopt_dst) + * is in m_len. + */ +#define MAX_IPOPTLEN 40 + +struct ipoption { + struct in_addr ipopt_dst; /* first-hop dst if source routed */ + int8_t ipopt_list[MAX_IPOPTLEN]; /* options proper */ +} QEMU_PACKED; + +#endif diff --git a/slirp/ip_icmp.c b/slirp/ip_icmp.c new file mode 100644 index 00000000..9f1cb08a --- /dev/null +++ b/slirp/ip_icmp.c @@ -0,0 +1,450 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 + * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp + */ + +#include "slirp.h" +#include "ip_icmp.h" + +/* The message sent when emulating PING */ +/* Be nice and tell them it's just a pseudo-ping packet */ +static const char icmp_ping_msg[] = "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST packets.\n"; + +/* list of actions for icmp_error() on RX of an icmp message */ +static const int icmp_flush[19] = { +/* ECHO REPLY (0) */ 0, + 1, + 1, +/* DEST UNREACH (3) */ 1, +/* SOURCE QUENCH (4)*/ 1, +/* REDIRECT (5) */ 1, + 1, + 1, +/* ECHO (8) */ 0, +/* ROUTERADVERT (9) */ 1, +/* ROUTERSOLICIT (10) */ 1, +/* TIME EXCEEDED (11) */ 1, +/* PARAMETER PROBLEM (12) */ 1, +/* TIMESTAMP (13) */ 0, +/* TIMESTAMP REPLY (14) */ 0, +/* INFO (15) */ 0, +/* INFO REPLY (16) */ 0, +/* ADDR MASK (17) */ 0, +/* ADDR MASK REPLY (18) */ 0 +}; + +void icmp_init(Slirp *slirp) +{ + slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; + slirp->icmp_last_so = &slirp->icmp; +} + +void icmp_cleanup(Slirp *slirp) +{ + while (slirp->icmp.so_next != &slirp->icmp) { + icmp_detach(slirp->icmp.so_next); + } +} + +static int icmp_send(struct socket *so, struct mbuf *m, int hlen) +{ + struct ip *ip = mtod(m, struct ip *); + struct sockaddr_in addr; + + so->s = qemu_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (so->s == -1) { + return -1; + } + + so->so_m = m; + so->so_faddr = ip->ip_dst; + so->so_laddr = ip->ip_src; + so->so_iptos = ip->ip_tos; + so->so_type = IPPROTO_ICMP; + so->so_state = SS_ISFCONNECTED; + so->so_expire = curtime + SO_EXPIRE; + + addr.sin_family = AF_INET; + addr.sin_addr = so->so_faddr; + + insque(so, &so->slirp->icmp); + + if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, + (struct sockaddr *)&addr, sizeof(addr)) == -1) { + DEBUG_MISC((dfd, "icmp_input icmp sendto tx errno = %d-%s\n", + errno, strerror(errno))); + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); + icmp_detach(so); + } + + return 0; +} + +void icmp_detach(struct socket *so) +{ + closesocket(so->s); + sofree(so); +} + +/* + * Process a received ICMP message. + */ +void +icmp_input(struct mbuf *m, int hlen) +{ + register struct icmp *icp; + register struct ip *ip=mtod(m, struct ip *); + int icmplen=ip->ip_len; + Slirp *slirp = m->slirp; + + DEBUG_CALL("icmp_input"); + DEBUG_ARG("m = %lx", (long )m); + DEBUG_ARG("m_len = %d", m->m_len); + + /* + * Locate icmp structure in mbuf, and check + * that its not corrupted and of at least minimum length. + */ + if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ + freeit: + m_free(m); + goto end_error; + } + + m->m_len -= hlen; + m->m_data += hlen; + icp = mtod(m, struct icmp *); + if (cksum(m, icmplen)) { + goto freeit; + } + m->m_len += hlen; + m->m_data -= hlen; + + DEBUG_ARG("icmp_type = %d", icp->icmp_type); + switch (icp->icmp_type) { + case ICMP_ECHO: + ip->ip_len += hlen; /* since ip_input subtracts this */ + if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { + icmp_reflect(m); + } else if (slirp->restricted) { + goto freeit; + } else { + struct socket *so; + struct sockaddr_in addr; + if ((so = socreate(slirp)) == NULL) goto freeit; + if (icmp_send(so, m, hlen) == 0) { + return; + } + if(udp_attach(so) == -1) { + DEBUG_MISC((dfd,"icmp_input udp_attach errno = %d-%s\n", + errno,strerror(errno))); + sofree(so); + m_free(m); + goto end_error; + } + so->so_m = m; + so->so_faddr = ip->ip_dst; + so->so_fport = htons(7); + so->so_laddr = ip->ip_src; + so->so_lport = htons(9); + so->so_iptos = ip->ip_tos; + so->so_type = IPPROTO_ICMP; + so->so_state = SS_ISFCONNECTED; + + /* Send the packet */ + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + /* It's an alias */ + if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { + if (get_dns_addr(&addr.sin_addr) < 0) + addr.sin_addr = loopback_addr; + } else { + addr.sin_addr = loopback_addr; + } + } else { + addr.sin_addr = so->so_faddr; + } + addr.sin_port = so->so_fport; + if(sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, + (struct sockaddr *)&addr, sizeof(addr)) == -1) { + DEBUG_MISC((dfd,"icmp_input udp sendto tx errno = %d-%s\n", + errno,strerror(errno))); + icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); + udp_detach(so); + } + } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ + break; + case ICMP_UNREACH: + /* XXX? report error? close socket? */ + case ICMP_TIMXCEED: + case ICMP_PARAMPROB: + case ICMP_SOURCEQUENCH: + case ICMP_TSTAMP: + case ICMP_MASKREQ: + case ICMP_REDIRECT: + m_free(m); + break; + + default: + m_free(m); + } /* swith */ + +end_error: + /* m is m_free()'d xor put in a socket xor or given to ip_send */ + return; +} + + +/* + * Send an ICMP message in response to a situation + * + * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). + * MUST NOT change this header information. + * MUST NOT reply to a multicast/broadcast IP address. + * MUST NOT reply to a multicast/broadcast MAC address. + * MUST reply to only the first fragment. + */ +/* + * Send ICMP_UNREACH back to the source regarding msrc. + * mbuf *msrc is used as a template, but is NOT m_free()'d. + * It is reported as the bad ip packet. The header should + * be fully correct and in host byte order. + * ICMP fragmentation is illegal. All machines must accept 576 bytes in one + * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 + */ + +#define ICMP_MAXDATALEN (IP_MSS-28) +void +icmp_error(struct mbuf *msrc, u_char type, u_char code, int minsize, + const char *message) +{ + unsigned hlen, shlen, s_ip_len; + register struct ip *ip; + register struct icmp *icp; + register struct mbuf *m; + + DEBUG_CALL("icmp_error"); + DEBUG_ARG("msrc = %lx", (long )msrc); + DEBUG_ARG("msrc_len = %d", msrc->m_len); + + if(type!=ICMP_UNREACH && type!=ICMP_TIMXCEED) goto end_error; + + /* check msrc */ + if(!msrc) goto end_error; + ip = mtod(msrc, struct ip *); +#ifdef DEBUG + { char bufa[20], bufb[20]; + strcpy(bufa, inet_ntoa(ip->ip_src)); + strcpy(bufb, inet_ntoa(ip->ip_dst)); + DEBUG_MISC((dfd, " %.16s to %.16s\n", bufa, bufb)); + } +#endif + if(ip->ip_off & IP_OFFMASK) goto end_error; /* Only reply to fragment 0 */ + + /* Do not reply to source-only IPs */ + if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { + goto end_error; + } + + shlen=ip->ip_hl << 2; + s_ip_len=ip->ip_len; + if(ip->ip_p == IPPROTO_ICMP) { + icp = (struct icmp *)((char *)ip + shlen); + /* + * Assume any unknown ICMP type is an error. This isn't + * specified by the RFC, but think about it.. + */ + if(icp->icmp_type>18 || icmp_flush[icp->icmp_type]) goto end_error; + } + + /* make a copy */ + m = m_get(msrc->slirp); + if (!m) { + goto end_error; + } + + { int new_m_size; + new_m_size=sizeof(struct ip )+ICMP_MINLEN+msrc->m_len+ICMP_MAXDATALEN; + if(new_m_size>m->m_size) m_inc(m, new_m_size); + } + memcpy(m->m_data, msrc->m_data, msrc->m_len); + m->m_len = msrc->m_len; /* copy msrc to m */ + + /* make the header of the reply packet */ + ip = mtod(m, struct ip *); + hlen= sizeof(struct ip ); /* no options in reply */ + + /* fill in icmp */ + m->m_data += hlen; + m->m_len -= hlen; + + icp = mtod(m, struct icmp *); + + if(minsize) s_ip_len=shlen+ICMP_MINLEN; /* return header+8b only */ + else if(s_ip_len>ICMP_MAXDATALEN) /* maximum size */ + s_ip_len=ICMP_MAXDATALEN; + + m->m_len=ICMP_MINLEN+s_ip_len; /* 8 bytes ICMP header */ + + /* min. size = 8+sizeof(struct ip)+8 */ + + icp->icmp_type = type; + icp->icmp_code = code; + icp->icmp_id = 0; + icp->icmp_seq = 0; + + memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ + HTONS(icp->icmp_ip.ip_len); + HTONS(icp->icmp_ip.ip_id); + HTONS(icp->icmp_ip.ip_off); + +#ifdef DEBUG + if(message) { /* DEBUG : append message to ICMP packet */ + int message_len; + char *cpnt; + message_len=strlen(message); + if(message_len>ICMP_MAXDATALEN) message_len=ICMP_MAXDATALEN; + cpnt=(char *)m->m_data+m->m_len; + memcpy(cpnt, message, message_len); + m->m_len+=message_len; + } +#endif + + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, m->m_len); + + m->m_data -= hlen; + m->m_len += hlen; + + /* fill in ip */ + ip->ip_hl = hlen >> 2; + ip->ip_len = m->m_len; + + ip->ip_tos=((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ + + ip->ip_ttl = MAXTTL; + ip->ip_p = IPPROTO_ICMP; + ip->ip_dst = ip->ip_src; /* ip addresses */ + ip->ip_src = m->slirp->vhost_addr; + + (void ) ip_output((struct socket *)NULL, m); + +end_error: + return; +} +#undef ICMP_MAXDATALEN + +/* + * Reflect the ip packet back to the source + */ +void +icmp_reflect(struct mbuf *m) +{ + register struct ip *ip = mtod(m, struct ip *); + int hlen = ip->ip_hl << 2; + int optlen = hlen - sizeof(struct ip ); + register struct icmp *icp; + + /* + * Send an icmp packet back to the ip level, + * after supplying a checksum. + */ + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + + icp->icmp_type = ICMP_ECHOREPLY; + icp->icmp_cksum = 0; + icp->icmp_cksum = cksum(m, ip->ip_len - hlen); + + m->m_data -= hlen; + m->m_len += hlen; + + /* fill in ip */ + if (optlen > 0) { + /* + * Strip out original options by copying rest of first + * mbuf's data back, and adjust the IP length. + */ + memmove((caddr_t)(ip + 1), (caddr_t)ip + hlen, + (unsigned )(m->m_len - hlen)); + hlen -= optlen; + ip->ip_hl = hlen >> 2; + ip->ip_len -= optlen; + m->m_len -= optlen; + } + + ip->ip_ttl = MAXTTL; + { /* swap */ + struct in_addr icmp_dst; + icmp_dst = ip->ip_dst; + ip->ip_dst = ip->ip_src; + ip->ip_src = icmp_dst; + } + + (void ) ip_output((struct socket *)NULL, m); +} + +void icmp_receive(struct socket *so) +{ + struct mbuf *m = so->so_m; + struct ip *ip = mtod(m, struct ip *); + int hlen = ip->ip_hl << 2; + u_char error_code; + struct icmp *icp; + int id, len; + + m->m_data += hlen; + m->m_len -= hlen; + icp = mtod(m, struct icmp *); + + id = icp->icmp_id; + len = qemu_recv(so->s, icp, m->m_len, 0); + icp->icmp_id = id; + + m->m_data -= hlen; + m->m_len += hlen; + + if (len == -1 || len == 0) { + if (errno == ENETUNREACH) { + error_code = ICMP_UNREACH_NET; + } else { + error_code = ICMP_UNREACH_HOST; + } + DEBUG_MISC((dfd, " udp icmp rx errno = %d-%s\n", errno, + strerror(errno))); + icmp_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); + } else { + icmp_reflect(so->so_m); + so->so_m = NULL; /* Don't m_free() it again! */ + } + icmp_detach(so); +} diff --git a/slirp/ip_icmp.h b/slirp/ip_icmp.h new file mode 100644 index 00000000..be4426b8 --- /dev/null +++ b/slirp/ip_icmp.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 + * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp + */ + +#ifndef _NETINET_IP_ICMP_H_ +#define _NETINET_IP_ICMP_H_ + +/* + * Interface Control Message Protocol Definitions. + * Per RFC 792, September 1981. + */ + +typedef uint32_t n_time; + +/* + * Structure of an icmp header. + */ +struct icmp { + u_char icmp_type; /* type of message, see below */ + u_char icmp_code; /* type sub code */ + u_short icmp_cksum; /* ones complement cksum of struct */ + union { + u_char ih_pptr; /* ICMP_PARAMPROB */ + struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ + struct ih_idseq { + u_short icd_id; + u_short icd_seq; + } ih_idseq; + int ih_void; + + /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ + struct ih_pmtu { + u_short ipm_void; + u_short ipm_nextmtu; + } ih_pmtu; + } icmp_hun; +#define icmp_pptr icmp_hun.ih_pptr +#define icmp_gwaddr icmp_hun.ih_gwaddr +#define icmp_id icmp_hun.ih_idseq.icd_id +#define icmp_seq icmp_hun.ih_idseq.icd_seq +#define icmp_void icmp_hun.ih_void +#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void +#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu + union { + struct id_ts { + n_time its_otime; + n_time its_rtime; + n_time its_ttime; + } id_ts; + struct id_ip { + struct ip idi_ip; + /* options and then 64 bits of data */ + } id_ip; + uint32_t id_mask; + char id_data[1]; + } icmp_dun; +#define icmp_otime icmp_dun.id_ts.its_otime +#define icmp_rtime icmp_dun.id_ts.its_rtime +#define icmp_ttime icmp_dun.id_ts.its_ttime +#define icmp_ip icmp_dun.id_ip.idi_ip +#define icmp_mask icmp_dun.id_mask +#define icmp_data icmp_dun.id_data +}; + +/* + * Lower bounds on packet lengths for various types. + * For the error advice packets must first ensure that the + * packet is large enough to contain the returned ip header. + * Only then can we do the check to see if 64 bits of packet + * data have been returned, since we need to check the returned + * ip header length. + */ +#define ICMP_MINLEN 8 /* abs minimum */ +#define ICMP_TSLEN (8 + 3 * sizeof (n_time)) /* timestamp */ +#define ICMP_MASKLEN 12 /* address mask */ +#define ICMP_ADVLENMIN (8 + sizeof (struct ip) + 8) /* min */ +#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) + /* N.B.: must separately check that ip_hl >= 5 */ + +/* + * Definition of type and code field values. + */ +#define ICMP_ECHOREPLY 0 /* echo reply */ +#define ICMP_UNREACH 3 /* dest unreachable, codes: */ +#define ICMP_UNREACH_NET 0 /* bad net */ +#define ICMP_UNREACH_HOST 1 /* bad host */ +#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ +#define ICMP_UNREACH_PORT 3 /* bad port */ +#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ +#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ +#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ +#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ +#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ +#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ +#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ +#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ +#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ +#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ +#define ICMP_REDIRECT 5 /* shorter route, codes: */ +#define ICMP_REDIRECT_NET 0 /* for network */ +#define ICMP_REDIRECT_HOST 1 /* for host */ +#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ +#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ +#define ICMP_ECHO 8 /* echo service */ +#define ICMP_ROUTERADVERT 9 /* router advertisement */ +#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ +#define ICMP_TIMXCEED 11 /* time exceeded, code: */ +#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ +#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ +#define ICMP_PARAMPROB 12 /* ip header bad */ +#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ +#define ICMP_TSTAMP 13 /* timestamp request */ +#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ +#define ICMP_IREQ 15 /* information request */ +#define ICMP_IREQREPLY 16 /* information reply */ +#define ICMP_MASKREQ 17 /* address mask request */ +#define ICMP_MASKREPLY 18 /* address mask reply */ + +#define ICMP_MAXTYPE 18 + +#define ICMP_INFOTYPE(type) \ + ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ + (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ + (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ + (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ + (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) + +void icmp_init(Slirp *slirp); +void icmp_cleanup(Slirp *slirp); +void icmp_input(struct mbuf *, int); +void icmp_error(struct mbuf *msrc, u_char type, u_char code, int minsize, + const char *message); +void icmp_reflect(struct mbuf *); +void icmp_receive(struct socket *so); +void icmp_detach(struct socket *so); + +#endif diff --git a/slirp/ip_input.c b/slirp/ip_input.c new file mode 100644 index 00000000..880bdfd3 --- /dev/null +++ b/slirp/ip_input.c @@ -0,0 +1,668 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 + * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include +#include +#include "ip_icmp.h" + +static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); +static void ip_freef(Slirp *slirp, struct ipq *fp); +static void ip_enq(register struct ipasfrag *p, + register struct ipasfrag *prev); +static void ip_deq(register struct ipasfrag *p); + +/* + * IP initialization: fill in IP protocol switch table. + * All protocols not implemented in kernel go to raw IP protocol handler. + */ +void +ip_init(Slirp *slirp) +{ + slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; + udp_init(slirp); + tcp_init(slirp); + icmp_init(slirp); +} + +void ip_cleanup(Slirp *slirp) +{ + udp_cleanup(slirp); + tcp_cleanup(slirp); + icmp_cleanup(slirp); +} + +/* + * Ip input routine. Checksum and byte swap header. If fragmented + * try to reassemble. Process options. Pass to next level. + */ +void +ip_input(struct mbuf *m) +{ + Slirp *slirp = m->slirp; + register struct ip *ip; + int hlen; + + DEBUG_CALL("ip_input"); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("m_len = %d", m->m_len); + + if (m->m_len < sizeof (struct ip)) { + return; + } + + ip = mtod(m, struct ip *); + + if (ip->ip_v != IPVERSION) { + goto bad; + } + + hlen = ip->ip_hl << 2; + if (hlenm->m_len) {/* min header length */ + goto bad; /* or packet too short */ + } + + /* keep ip header intact for ICMP reply + * ip->ip_sum = cksum(m, hlen); + * if (ip->ip_sum) { + */ + if(cksum(m,hlen)) { + goto bad; + } + + /* + * Convert fields to host representation. + */ + NTOHS(ip->ip_len); + if (ip->ip_len < hlen) { + goto bad; + } + NTOHS(ip->ip_id); + NTOHS(ip->ip_off); + + /* + * Check that the amount of data in the buffers + * is as at least much as the IP header would have us expect. + * Trim mbufs if longer than we expect. + * Drop packet if shorter than we expect. + */ + if (m->m_len < ip->ip_len) { + goto bad; + } + + /* Should drop packet if mbuf too long? hmmm... */ + if (m->m_len > ip->ip_len) + m_adj(m, ip->ip_len - m->m_len); + + /* check ip_ttl for a correct ICMP reply */ + if(ip->ip_ttl==0) { + icmp_error(m, ICMP_TIMXCEED,ICMP_TIMXCEED_INTRANS, 0,"ttl"); + goto bad; + } + + /* + * If offset or IP_MF are set, must reassemble. + * Otherwise, nothing need be done. + * (We could look in the reassembly queue to see + * if the packet was previously fragmented, + * but it's not worth the time; just let them time out.) + * + * XXX This should fail, don't fragment yet + */ + if (ip->ip_off &~ IP_DF) { + register struct ipq *fp; + struct qlink *l; + /* + * Look for queue of fragments + * of this datagram. + */ + for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; + l = l->next) { + fp = container_of(l, struct ipq, ip_link); + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && + ip->ip_p == fp->ipq_p) + goto found; + } + fp = NULL; + found: + + /* + * Adjust ip_len to not reflect header, + * set ip_mff if more fragments are expected, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + if (ip->ip_off & IP_MF) + ip->ip_tos |= 1; + else + ip->ip_tos &= ~1; + + ip->ip_off <<= 3; + + /* + * If datagram marked as having more fragments + * or if this is not the first fragment, + * attempt reassembly; if it succeeds, proceed. + */ + if (ip->ip_tos & 1 || ip->ip_off) { + ip = ip_reass(slirp, ip, fp); + if (ip == NULL) + return; + m = dtom(slirp, ip); + } else + if (fp) + ip_freef(slirp, fp); + + } else + ip->ip_len -= hlen; + + /* + * Switch out to protocol's input routine. + */ + switch (ip->ip_p) { + case IPPROTO_TCP: + tcp_input(m, hlen, (struct socket *)NULL); + break; + case IPPROTO_UDP: + udp_input(m, hlen); + break; + case IPPROTO_ICMP: + icmp_input(m, hlen); + break; + default: + m_free(m); + } + return; +bad: + m_free(m); +} + +#define iptofrag(P) ((struct ipasfrag *)(((char*)(P)) - sizeof(struct qlink))) +#define fragtoip(P) ((struct ip*)(((char*)(P)) + sizeof(struct qlink))) +/* + * Take incoming datagram fragment and try to + * reassemble it into whole datagram. If a chain for + * reassembly of this datagram already exists, then it + * is given as fp; otherwise have to make a chain. + */ +static struct ip * +ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) +{ + register struct mbuf *m = dtom(slirp, ip); + register struct ipasfrag *q; + int hlen = ip->ip_hl << 2; + int i, next; + + DEBUG_CALL("ip_reass"); + DEBUG_ARG("ip = %lx", (long)ip); + DEBUG_ARG("fp = %lx", (long)fp); + DEBUG_ARG("m = %lx", (long)m); + + /* + * Presence of header sizes in mbufs + * would confuse code below. + * Fragment m_data is concatenated. + */ + m->m_data += hlen; + m->m_len -= hlen; + + /* + * If first fragment to arrive, create a reassembly queue. + */ + if (fp == NULL) { + struct mbuf *t = m_get(slirp); + + if (t == NULL) { + goto dropfrag; + } + fp = mtod(t, struct ipq *); + insque(&fp->ip_link, &slirp->ipq.ip_link); + fp->ipq_ttl = IPFRAGTTL; + fp->ipq_p = ip->ip_p; + fp->ipq_id = ip->ip_id; + fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; + fp->ipq_src = ip->ip_src; + fp->ipq_dst = ip->ip_dst; + q = (struct ipasfrag *)fp; + goto insert; + } + + /* + * Find a segment which begins after this one does. + */ + for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; + q = q->ipf_next) + if (q->ipf_off > ip->ip_off) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (q->ipf_prev != &fp->frag_link) { + struct ipasfrag *pq = q->ipf_prev; + i = pq->ipf_off + pq->ipf_len - ip->ip_off; + if (i > 0) { + if (i >= ip->ip_len) + goto dropfrag; + m_adj(dtom(slirp, ip), i); + ip->ip_off += i; + ip->ip_len -= i; + } + } + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (q != (struct ipasfrag*)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; + q->ipf_off += i; + m_adj(dtom(slirp, q), i); + break; + } + q = q->ipf_next; + m_free(dtom(slirp, q->ipf_prev)); + ip_deq(q->ipf_prev); + } + +insert: + /* + * Stick new segment in its place; + * check for complete reassembly. + */ + ip_enq(iptofrag(ip), q->ipf_prev); + next = 0; + for (q = fp->frag_link.next; q != (struct ipasfrag*)&fp->frag_link; + q = q->ipf_next) { + if (q->ipf_off != next) + return NULL; + next += q->ipf_len; + } + if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) + return NULL; + + /* + * Reassembly is complete; concatenate fragments. + */ + q = fp->frag_link.next; + m = dtom(slirp, q); + + q = (struct ipasfrag *) q->ipf_next; + while (q != (struct ipasfrag*)&fp->frag_link) { + struct mbuf *t = dtom(slirp, q); + q = (struct ipasfrag *) q->ipf_next; + m_cat(m, t); + } + + /* + * Create header for new ip packet by + * modifying header of first packet; + * dequeue and discard fragment reassembly header. + * Make header visible. + */ + q = fp->frag_link.next; + + /* + * If the fragments concatenated to an mbuf that's + * bigger than the total size of the fragment, then and + * m_ext buffer was alloced. But fp->ipq_next points to + * the old buffer (in the mbuf), so we must point ip + * into the new buffer. + */ + if (m->m_flags & M_EXT) { + int delta = (char *)q - m->m_dat; + q = (struct ipasfrag *)(m->m_ext + delta); + } + + ip = fragtoip(q); + ip->ip_len = next; + ip->ip_tos &= ~1; + ip->ip_src = fp->ipq_src; + ip->ip_dst = fp->ipq_dst; + remque(&fp->ip_link); + (void) m_free(dtom(slirp, fp)); + m->m_len += (ip->ip_hl << 2); + m->m_data -= (ip->ip_hl << 2); + + return ip; + +dropfrag: + m_free(m); + return NULL; +} + +/* + * Free a fragment reassembly header and all + * associated datagrams. + */ +static void +ip_freef(Slirp *slirp, struct ipq *fp) +{ + register struct ipasfrag *q, *p; + + for (q = fp->frag_link.next; q != (struct ipasfrag*)&fp->frag_link; q = p) { + p = q->ipf_next; + ip_deq(q); + m_free(dtom(slirp, q)); + } + remque(&fp->ip_link); + (void) m_free(dtom(slirp, fp)); +} + +/* + * Put an ip fragment on a reassembly chain. + * Like insque, but pointers in middle of structure. + */ +static void +ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) +{ + DEBUG_CALL("ip_enq"); + DEBUG_ARG("prev = %lx", (long)prev); + p->ipf_prev = prev; + p->ipf_next = prev->ipf_next; + ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; + prev->ipf_next = p; +} + +/* + * To ip_enq as remque is to insque. + */ +static void +ip_deq(register struct ipasfrag *p) +{ + ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; + ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; +} + +/* + * IP timer processing; + * if a timer expires on a reassembly + * queue, discard it. + */ +void +ip_slowtimo(Slirp *slirp) +{ + struct qlink *l; + + DEBUG_CALL("ip_slowtimo"); + + l = slirp->ipq.ip_link.next; + + if (l == NULL) + return; + + while (l != &slirp->ipq.ip_link) { + struct ipq *fp = container_of(l, struct ipq, ip_link); + l = l->next; + if (--fp->ipq_ttl == 0) { + ip_freef(slirp, fp); + } + } +} + +/* + * Do option processing on a datagram, + * possibly discarding it if bad options are encountered, + * or forwarding it if source-routed. + * Returns 1 if packet has been forwarded/freed, + * 0 if the packet should be processed further. + */ + +#ifdef notdef + +int +ip_dooptions(m) + struct mbuf *m; +{ + register struct ip *ip = mtod(m, struct ip *); + register u_char *cp; + register struct ip_timestamp *ipt; + register struct in_ifaddr *ia; + int opt, optlen, cnt, off, code, type, forward = 0; + struct in_addr *sin, dst; +typedef uint32_t n_time; + n_time ntime; + + dst = ip->ip_dst; + cp = (u_char *)(ip + 1); + cnt = (ip->ip_hl << 2) - sizeof (struct ip); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > cnt) { + code = &cp[IPOPT_OLEN] - (u_char *)ip; + goto bad; + } + } + switch (opt) { + + default: + break; + + /* + * Source routing with record. + * Find interface with current destination address. + * If none on this machine then drop if strictly routed, + * or do nothing if loosely routed. + * Record interface address and bring up next address + * component. If strictly routed make sure next + * address is on directly accessible net. + */ + case IPOPT_LSRR: + case IPOPT_SSRR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + ipaddr.sin_addr = ip->ip_dst; + ia = (struct in_ifaddr *) + ifa_ifwithaddr((struct sockaddr *)&ipaddr); + if (ia == 0) { + if (opt == IPOPT_SSRR) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + /* + * Loose routing, and not at next destination + * yet; nothing to do except forward. + */ + break; + } + off--; /* 0 origin */ + if (off > optlen - sizeof(struct in_addr)) { + /* + * End of source route. Should be for us. + */ + save_rte(cp, ip->ip_src); + break; + } + /* + * locate outgoing interface + */ + bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + if (opt == IPOPT_SSRR) { +#define INA struct in_ifaddr * +#define SA struct sockaddr * + if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) + ia = (INA)ifa_ifwithnet((SA)&ipaddr); + } else + ia = ip_rtaddr(ipaddr.sin_addr); + if (ia == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_SRCFAIL; + goto bad; + } + ip->ip_dst = ipaddr.sin_addr; + bcopy((caddr_t)&(IA_SIN(ia)->sin_addr), + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + /* + * Let ip_intr's mcast routing check handle mcast pkts + */ + forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); + break; + + case IPOPT_RR: + if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { + code = &cp[IPOPT_OFFSET] - (u_char *)ip; + goto bad; + } + /* + * If no space remains, ignore. + */ + off--; /* 0 origin */ + if (off > optlen - sizeof(struct in_addr)) + break; + bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr, + sizeof(ipaddr.sin_addr)); + /* + * locate outgoing interface; if we're the destination, + * use the incoming interface (should be same). + */ + if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 && + (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; + goto bad; + } + bcopy((caddr_t)&(IA_SIN(ia)->sin_addr), + (caddr_t)(cp + off), sizeof(struct in_addr)); + cp[IPOPT_OFFSET] += sizeof(struct in_addr); + break; + + case IPOPT_TS: + code = cp - (u_char *)ip; + ipt = (struct ip_timestamp *)cp; + if (ipt->ipt_len < 5) + goto bad; + if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) { + if (++ipt->ipt_oflw == 0) + goto bad; + break; + } + sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1); + switch (ipt->ipt_flg) { + + case IPOPT_TS_TSONLY: + break; + + case IPOPT_TS_TSANDADDR: + if (ipt->ipt_ptr + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) + goto bad; + ipaddr.sin_addr = dst; + ia = (INA)ifaof_ i f p foraddr((SA)&ipaddr, + m->m_pkthdr.rcvif); + if (ia == 0) + continue; + bcopy((caddr_t)&IA_SIN(ia)->sin_addr, + (caddr_t)sin, sizeof(struct in_addr)); + ipt->ipt_ptr += sizeof(struct in_addr); + break; + + case IPOPT_TS_PRESPEC: + if (ipt->ipt_ptr + sizeof(n_time) + + sizeof(struct in_addr) > ipt->ipt_len) + goto bad; + bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr, + sizeof(struct in_addr)); + if (ifa_ifwithaddr((SA)&ipaddr) == 0) + continue; + ipt->ipt_ptr += sizeof(struct in_addr); + break; + + default: + goto bad; + } + ntime = iptime(); + bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1, + sizeof(n_time)); + ipt->ipt_ptr += sizeof(n_time); + } + } + if (forward) { + ip_forward(m, 1); + return (1); + } + return (0); +bad: + icmp_error(m, type, code, 0, 0); + + return (1); +} + +#endif /* notdef */ + +/* + * Strip out IP options, at higher + * level protocol in the kernel. + * Second argument is buffer to which options + * will be moved, and return value is their length. + * (XXX) should be deleted; last arg currently ignored. + */ +void +ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) +{ + register int i; + struct ip *ip = mtod(m, struct ip *); + register caddr_t opts; + int olen; + + olen = (ip->ip_hl<<2) - sizeof (struct ip); + opts = (caddr_t)(ip + 1); + i = m->m_len - (sizeof (struct ip) + olen); + memcpy(opts, opts + olen, (unsigned)i); + m->m_len -= olen; + + ip->ip_hl = sizeof(struct ip) >> 2; +} diff --git a/slirp/ip_output.c b/slirp/ip_output.c new file mode 100644 index 00000000..c82830fe --- /dev/null +++ b/slirp/ip_output.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 + * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp + */ + +/* + * Changes and additions relating to SLiRP are + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include + +/* Number of packets queued before we start sending + * (to prevent allocing too many mbufs) */ +#define IF_THRESH 10 + +/* + * IP output. The packet in mbuf chain m contains a skeletal IP + * header (with len, off, ttl, proto, tos, src, dst). + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + */ +int +ip_output(struct socket *so, struct mbuf *m0) +{ + Slirp *slirp = m0->slirp; + register struct ip *ip; + register struct mbuf *m = m0; + register int hlen = sizeof(struct ip ); + int len, off, error = 0; + + DEBUG_CALL("ip_output"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m0 = %lx", (long)m0); + + ip = mtod(m, struct ip *); + /* + * Fill in IP header. + */ + ip->ip_v = IPVERSION; + ip->ip_off &= IP_DF; + ip->ip_id = htons(slirp->ip_id++); + ip->ip_hl = hlen >> 2; + + /* + * If small enough for interface, can just send directly. + */ + if ((uint16_t)ip->ip_len <= IF_MTU) { + ip->ip_len = htons((uint16_t)ip->ip_len); + ip->ip_off = htons((uint16_t)ip->ip_off); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); + + if_output(so, m); + goto done; + } + + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ + if (ip->ip_off & IP_DF) { + error = -1; + goto bad; + } + + len = (IF_MTU - hlen) &~ 7; /* ip databytes per packet */ + if (len < 8) { + error = -1; + goto bad; + } + + { + int mhlen, firstlen = len; + struct mbuf **mnext = &m->m_nextpkt; + + /* + * Loop through length of segment after first fragment, + * make new header and copy data of each part and link onto chain. + */ + m0 = m; + mhlen = sizeof (struct ip); + for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { + register struct ip *mhip; + m = m_get(slirp); + if (m == NULL) { + error = -1; + goto sendorfree; + } + m->m_data += IF_MAXLINKHDR; + mhip = mtod(m, struct ip *); + *mhip = *ip; + + m->m_len = mhlen; + mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); + if (ip->ip_off & IP_MF) + mhip->ip_off |= IP_MF; + if (off + len >= (uint16_t)ip->ip_len) + len = (uint16_t)ip->ip_len - off; + else + mhip->ip_off |= IP_MF; + mhip->ip_len = htons((uint16_t)(len + mhlen)); + + if (m_copy(m, m0, off, len) < 0) { + error = -1; + goto sendorfree; + } + + mhip->ip_off = htons((uint16_t)mhip->ip_off); + mhip->ip_sum = 0; + mhip->ip_sum = cksum(m, mhlen); + *mnext = m; + mnext = &m->m_nextpkt; + } + /* + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). + */ + m = m0; + m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); + ip->ip_len = htons((uint16_t)m->m_len); + ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); + ip->ip_sum = 0; + ip->ip_sum = cksum(m, hlen); +sendorfree: + for (m = m0; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = NULL; + if (error == 0) + if_output(so, m); + else + m_free(m); + } + } + +done: + return (error); + +bad: + m_free(m0); + goto done; +} diff --git a/slirp/libslirp.h b/slirp/libslirp.h new file mode 100644 index 00000000..5bdcbd50 --- /dev/null +++ b/slirp/libslirp.h @@ -0,0 +1,43 @@ +#ifndef _LIBSLIRP_H +#define _LIBSLIRP_H + +#include "qemu-common.h" + +struct Slirp; +typedef struct Slirp Slirp; + +int get_dns_addr(struct in_addr *pdns_addr); + +Slirp *slirp_init(int restricted, struct in_addr vnetwork, + struct in_addr vnetmask, struct in_addr vhost, + const char *vhostname, const char *tftp_path, + const char *bootfile, struct in_addr vdhcp_start, + struct in_addr vnameserver, const char **vdnssearch, + void *opaque); +void slirp_cleanup(Slirp *slirp); + +void slirp_pollfds_fill(GArray *pollfds, uint32_t *timeout); + +void slirp_pollfds_poll(GArray *pollfds, int select_error); + +void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); + +/* you must provide the following functions: */ +void slirp_output(void *opaque, const uint8_t *pkt, int pkt_len); + +int slirp_add_hostfwd(Slirp *slirp, int is_udp, + struct in_addr host_addr, int host_port, + struct in_addr guest_addr, int guest_port); +int slirp_remove_hostfwd(Slirp *slirp, int is_udp, + struct in_addr host_addr, int host_port); +int slirp_add_exec(Slirp *slirp, int do_pty, const void *args, + struct in_addr *guest_addr, int guest_port); + +void slirp_connection_info(Slirp *slirp, Monitor *mon); + +void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port, const uint8_t *buf, int size); +size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port); + +#endif diff --git a/slirp/main.h b/slirp/main.h new file mode 100644 index 00000000..f2e58cfe --- /dev/null +++ b/slirp/main.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ +#ifndef SLIRP_MAIN_H +#define SLIRP_MAIN_H 1 + +#ifdef HAVE_SYS_SELECT_H +#include +#endif + +#define TOWRITEMAX 512 + +extern int slirp_socket; +extern int slirp_socket_unit; +extern int slirp_socket_port; +extern uint32_t slirp_socket_addr; +extern char *slirp_socket_passwd; +extern int ctty_closed; + +/* + * Get the difference in 2 times from updtim() + * Allow for wraparound times, "just in case" + * x is the greater of the 2 (current time) and y is + * what it's being compared against. + */ +#define TIME_DIFF(x,y) (x)-(y) < 0 ? ~0-(y)+(x) : (x)-(y) + +extern char *slirp_tty; +extern char *exec_shell; +extern u_int curtime; +extern struct in_addr loopback_addr; +extern unsigned long loopback_mask; +extern char *username; +extern char *socket_path; +extern int towrite_max; +extern int ppp_exit; +extern int tcp_keepintvl; + +#define PROTO_SLIP 0x1 +#ifdef USE_PPP +#define PROTO_PPP 0x2 +#endif + +int if_encap(Slirp *slirp, struct mbuf *ifm); +ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags); + +#endif diff --git a/slirp/mbuf.c b/slirp/mbuf.c new file mode 100644 index 00000000..4fefb043 --- /dev/null +++ b/slirp/mbuf.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 1995 Danny Gasparovski + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +/* + * mbuf's in SLiRP are much simpler than the real mbufs in + * FreeBSD. They are fixed size, determined by the MTU, + * so that one whole packet can fit. Mbuf's cannot be + * chained together. If there's more data than the mbuf + * could hold, an external malloced buffer is pointed to + * by m_ext (and the data pointers) and M_EXT is set in + * the flags + */ + +#include + +#define MBUF_THRESH 30 + +/* + * Find a nice value for msize + * XXX if_maxlinkhdr already in mtu + */ +#define SLIRP_MSIZE (IF_MTU + IF_MAXLINKHDR + offsetof(struct mbuf, m_dat) + 6) + +void +m_init(Slirp *slirp) +{ + slirp->m_freelist.m_next = slirp->m_freelist.m_prev = &slirp->m_freelist; + slirp->m_usedlist.m_next = slirp->m_usedlist.m_prev = &slirp->m_usedlist; +} + +void m_cleanup(Slirp *slirp) +{ + struct mbuf *m, *next; + + m = slirp->m_usedlist.m_next; + while (m != &slirp->m_usedlist) { + next = m->m_next; + if (m->m_flags & M_EXT) { + free(m->m_ext); + } + free(m); + m = next; + } + m = slirp->m_freelist.m_next; + while (m != &slirp->m_freelist) { + next = m->m_next; + free(m); + m = next; + } +} + +/* + * Get an mbuf from the free list, if there are none + * malloc one + * + * Because fragmentation can occur if we alloc new mbufs and + * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, + * which tells m_free to actually free() it + */ +struct mbuf * +m_get(Slirp *slirp) +{ + register struct mbuf *m; + int flags = 0; + + DEBUG_CALL("m_get"); + + if (slirp->m_freelist.m_next == &slirp->m_freelist) { + m = (struct mbuf *)malloc(SLIRP_MSIZE); + if (m == NULL) goto end_error; + slirp->mbuf_alloced++; + if (slirp->mbuf_alloced > MBUF_THRESH) + flags = M_DOFREE; + m->slirp = slirp; + } else { + m = slirp->m_freelist.m_next; + remque(m); + } + + /* Insert it in the used list */ + insque(m,&slirp->m_usedlist); + m->m_flags = (flags | M_USEDLIST); + + /* Initialise it */ + m->m_size = SLIRP_MSIZE - offsetof(struct mbuf, m_dat); + m->m_data = m->m_dat; + m->m_len = 0; + m->m_nextpkt = NULL; + m->m_prevpkt = NULL; + m->arp_requested = false; + m->expiration_date = (uint64_t)-1; +end_error: + DEBUG_ARG("m = %lx", (long )m); + return m; +} + +void +m_free(struct mbuf *m) +{ + + DEBUG_CALL("m_free"); + DEBUG_ARG("m = %lx", (long )m); + + if(m) { + /* Remove from m_usedlist */ + if (m->m_flags & M_USEDLIST) + remque(m); + + /* If it's M_EXT, free() it */ + if (m->m_flags & M_EXT) + free(m->m_ext); + + /* + * Either free() it or put it on the free list + */ + if (m->m_flags & M_DOFREE) { + m->slirp->mbuf_alloced--; + free(m); + } else if ((m->m_flags & M_FREELIST) == 0) { + insque(m,&m->slirp->m_freelist); + m->m_flags = M_FREELIST; /* Clobber other flags */ + } + } /* if(m) */ +} + +/* + * Copy data from one mbuf to the end of + * the other.. if result is too big for one mbuf, malloc() + * an M_EXT data segment + */ +void +m_cat(struct mbuf *m, struct mbuf *n) +{ + /* + * If there's no room, realloc + */ + if (M_FREEROOM(m) < n->m_len) + m_inc(m,m->m_size+MINCSIZE); + + memcpy(m->m_data+m->m_len, n->m_data, n->m_len); + m->m_len += n->m_len; + + m_free(n); +} + + +/* make m size bytes large */ +void +m_inc(struct mbuf *m, int size) +{ + int datasize; + + /* some compiles throw up on gotos. This one we can fake. */ + if(m->m_size>size) return; + + if (m->m_flags & M_EXT) { + datasize = m->m_data - m->m_ext; + m->m_ext = (char *)realloc(m->m_ext,size); + m->m_data = m->m_ext + datasize; + } else { + char *dat; + datasize = m->m_data - m->m_dat; + dat = (char *)malloc(size); + memcpy(dat, m->m_dat, m->m_size); + + m->m_ext = dat; + m->m_data = m->m_ext + datasize; + m->m_flags |= M_EXT; + } + + m->m_size = size; + +} + + + +void +m_adj(struct mbuf *m, int len) +{ + if (m == NULL) + return; + if (len >= 0) { + /* Trim from head */ + m->m_data += len; + m->m_len -= len; + } else { + /* Trim from tail */ + len = -len; + m->m_len -= len; + } +} + + +/* + * Copy len bytes from m, starting off bytes into n + */ +int +m_copy(struct mbuf *n, struct mbuf *m, int off, int len) +{ + if (len > M_FREEROOM(n)) + return -1; + + memcpy((n->m_data + n->m_len), (m->m_data + off), len); + n->m_len += len; + return 0; +} + + +/* + * Given a pointer into an mbuf, return the mbuf + * XXX This is a kludge, I should eliminate the need for it + * Fortunately, it's not used often + */ +struct mbuf * +dtom(Slirp *slirp, void *dat) +{ + struct mbuf *m; + + DEBUG_CALL("dtom"); + DEBUG_ARG("dat = %lx", (long )dat); + + /* bug corrected for M_EXT buffers */ + for (m = slirp->m_usedlist.m_next; m != &slirp->m_usedlist; + m = m->m_next) { + if (m->m_flags & M_EXT) { + if( (char *)dat>=m->m_ext && (char *)dat<(m->m_ext + m->m_size) ) + return m; + } else { + if( (char *)dat >= m->m_dat && (char *)dat<(m->m_dat + m->m_size) ) + return m; + } + } + + DEBUG_ERROR((dfd, "dtom failed")); + + return (struct mbuf *)0; +} diff --git a/slirp/mbuf.h b/slirp/mbuf.h new file mode 100644 index 00000000..b144f1ce --- /dev/null +++ b/slirp/mbuf.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 + * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp + */ + +#ifndef _MBUF_H_ +#define _MBUF_H_ + +#define MINCSIZE 4096 /* Amount to increase mbuf if too small */ + +/* + * Macros for type conversion + * mtod(m,t) - convert mbuf pointer to data pointer of correct type + */ +#define mtod(m,t) ((t)(m)->m_data) + +/* XXX About mbufs for slirp: + * Only one mbuf is ever used in a chain, for each "cell" of data. + * m_nextpkt points to the next packet, if fragmented. + * If the data is too large, the M_EXT is used, and a larger block + * is alloced. Therefore, m_free[m] must check for M_EXT and if set + * free the m_ext. This is inefficient memory-wise, but who cares. + */ + +/* + * How much room is in the mbuf, from m_data to the end of the mbuf + */ +#define M_ROOM(m) ((m->m_flags & M_EXT)? \ + (((m)->m_ext + (m)->m_size) - (m)->m_data) \ + : \ + (((m)->m_dat + (m)->m_size) - (m)->m_data)) + +/* + * How much free room there is + */ +#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) +#define M_TRAILINGSPACE M_FREEROOM + +struct mbuf { + /* XXX should union some of these! */ + /* header at beginning of each mbuf: */ + struct mbuf *m_next; /* Linked list of mbufs */ + struct mbuf *m_prev; + struct mbuf *m_nextpkt; /* Next packet in queue/record */ + struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ + int m_flags; /* Misc flags */ + + int m_size; /* Size of data */ + struct socket *m_so; + + caddr_t m_data; /* Location of data */ + int m_len; /* Amount of data in this mbuf */ + + Slirp *slirp; + bool arp_requested; + uint64_t expiration_date; + /* start of dynamic buffer area, must be last element */ + union { + char m_dat[1]; /* ANSI don't like 0 sized arrays */ + char *m_ext; + }; +}; + +#define ifq_prev m_prev +#define ifq_next m_next +#define ifs_prev m_prevpkt +#define ifs_next m_nextpkt +#define ifq_so m_so + +#define M_EXT 0x01 /* m_ext points to more (malloced) data */ +#define M_FREELIST 0x02 /* mbuf is on free list */ +#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ +#define M_DOFREE 0x08 /* when m_free is called on the mbuf, free() + * it rather than putting it on the free list */ + +void m_init(Slirp *); +void m_cleanup(Slirp *slirp); +struct mbuf * m_get(Slirp *); +void m_free(struct mbuf *); +void m_cat(register struct mbuf *, register struct mbuf *); +void m_inc(struct mbuf *, int); +void m_adj(struct mbuf *, int); +int m_copy(struct mbuf *, struct mbuf *, int, int); +struct mbuf * dtom(Slirp *, void *); + +static inline void ifs_init(struct mbuf *ifm) +{ + ifm->ifs_next = ifm->ifs_prev = ifm; +} + +#endif diff --git a/slirp/misc.c b/slirp/misc.c new file mode 100644 index 00000000..578e8b2c --- /dev/null +++ b/slirp/misc.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include +#include + +#include "monitor/monitor.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" + +#ifdef DEBUG +int slirp_debug = DBG_CALL|DBG_MISC|DBG_ERROR; +#endif + +struct quehead { + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +inline void +insque(void *a, void *b) +{ + register struct quehead *element = (struct quehead *) a; + register struct quehead *head = (struct quehead *) b; + element->qh_link = head->qh_link; + head->qh_link = (struct quehead *)element; + element->qh_rlink = (struct quehead *)head; + ((struct quehead *)(element->qh_link))->qh_rlink + = (struct quehead *)element; +} + +inline void +remque(void *a) +{ + register struct quehead *element = (struct quehead *) a; + ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; + ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; + element->qh_rlink = NULL; +} + +int add_exec(struct ex_list **ex_ptr, int do_pty, char *exec, + struct in_addr addr, int port) +{ + struct ex_list *tmp_ptr; + + /* First, check if the port is "bound" */ + for (tmp_ptr = *ex_ptr; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { + if (port == tmp_ptr->ex_fport && + addr.s_addr == tmp_ptr->ex_addr.s_addr) + return -1; + } + + tmp_ptr = *ex_ptr; + *ex_ptr = g_new(struct ex_list, 1); + (*ex_ptr)->ex_fport = port; + (*ex_ptr)->ex_addr = addr; + (*ex_ptr)->ex_pty = do_pty; + (*ex_ptr)->ex_exec = (do_pty == 3) ? exec : g_strdup(exec); + (*ex_ptr)->ex_next = tmp_ptr; + return 0; +} + +#ifndef HAVE_STRERROR + +/* + * For systems with no strerror + */ + +extern int sys_nerr; +extern char *sys_errlist[]; + +char * +strerror(error) + int error; +{ + if (error < sys_nerr) + return sys_errlist[error]; + else + return "Unknown error."; +} + +#endif + + +#ifdef _WIN32 + +int +fork_exec(struct socket *so, const char *ex, int do_pty) +{ + /* not implemented */ + return 0; +} + +#else + +/* + * XXX This is ugly + * We create and bind a socket, then fork off to another + * process, which connects to this socket, after which we + * exec the wanted program. If something (strange) happens, + * the accept() call could block us forever. + * + * do_pty = 0 Fork/exec inetd style + * do_pty = 1 Fork/exec using slirp.telnetd + * do_ptr = 2 Fork/exec using pty + */ +int +fork_exec(struct socket *so, const char *ex, int do_pty) +{ + int s; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + int opt; + const char *argv[256]; + /* don't want to clobber the original */ + char *bptr; + const char *curarg; + int c, i, ret; + pid_t pid; + + DEBUG_CALL("fork_exec"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("ex = %lx", (long)ex); + DEBUG_ARG("do_pty = %lx", (long)do_pty); + + if (do_pty == 2) { + return 0; + } else { + addr.sin_family = AF_INET; + addr.sin_port = 0; + addr.sin_addr.s_addr = INADDR_ANY; + + if ((s = qemu_socket(AF_INET, SOCK_STREAM, 0)) < 0 || + bind(s, (struct sockaddr *)&addr, addrlen) < 0 || + listen(s, 1) < 0) { + error_report("Error: inet socket: %s", strerror(errno)); + closesocket(s); + + return 0; + } + } + + pid = fork(); + switch(pid) { + case -1: + error_report("Error: fork failed: %s", strerror(errno)); + close(s); + return 0; + + case 0: + setsid(); + + /* Set the DISPLAY */ + getsockname(s, (struct sockaddr *)&addr, &addrlen); + close(s); + /* + * Connect to the socket + * XXX If any of these fail, we're in trouble! + */ + s = qemu_socket(AF_INET, SOCK_STREAM, 0); + addr.sin_addr = loopback_addr; + do { + ret = connect(s, (struct sockaddr *)&addr, addrlen); + } while (ret < 0 && errno == EINTR); + + dup2(s, 0); + dup2(s, 1); + dup2(s, 2); + for (s = getdtablesize() - 1; s >= 3; s--) + close(s); + + i = 0; + bptr = g_strdup(ex); /* No need to free() this */ + if (do_pty == 1) { + /* Setup "slirp.telnetd -x" */ + argv[i++] = "slirp.telnetd"; + argv[i++] = "-x"; + argv[i++] = bptr; + } else + do { + /* Change the string into argv[] */ + curarg = bptr; + while (*bptr != ' ' && *bptr != (char)0) + bptr++; + c = *bptr; + *bptr++ = (char)0; + argv[i++] = g_strdup(curarg); + } while (c); + + argv[i] = NULL; + execvp(argv[0], (char **)argv); + + /* Ooops, failed, let's tell the user why */ + fprintf(stderr, "Error: execvp of %s failed: %s\n", + argv[0], strerror(errno)); + close(0); close(1); close(2); /* XXX */ + exit(1); + + default: + qemu_add_child_watch(pid); + /* + * XXX this could block us... + * XXX Should set a timer here, and if accept() doesn't + * return after X seconds, declare it a failure + * The only reason this will block forever is if socket() + * of connect() fail in the child process + */ + do { + so->s = accept(s, (struct sockaddr *)&addr, &addrlen); + } while (so->s < 0 && errno == EINTR); + closesocket(s); + socket_set_fast_reuse(so->s); + opt = 1; + qemu_setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); + qemu_set_nonblock(so->s); + + /* Append the telnet options now */ + if (so->so_m != NULL && do_pty == 1) { + sbappend(so, so->so_m); + so->so_m = NULL; + } + + return 1; + } +} +#endif + +void slirp_connection_info(Slirp *slirp, Monitor *mon) +{ + const char * const tcpstates[] = { + [TCPS_CLOSED] = "CLOSED", + [TCPS_LISTEN] = "LISTEN", + [TCPS_SYN_SENT] = "SYN_SENT", + [TCPS_SYN_RECEIVED] = "SYN_RCVD", + [TCPS_ESTABLISHED] = "ESTABLISHED", + [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", + [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", + [TCPS_CLOSING] = "CLOSING", + [TCPS_LAST_ACK] = "LAST_ACK", + [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", + [TCPS_TIME_WAIT] = "TIME_WAIT", + }; + struct in_addr dst_addr; + struct sockaddr_in src; + socklen_t src_len; + uint16_t dst_port; + struct socket *so; + const char *state; + char buf[20]; + + monitor_printf(mon, " Protocol[State] FD Source Address Port " + "Dest. Address Port RecvQ SendQ\n"); + + for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { + if (so->so_state & SS_HOSTFWD) { + state = "HOST_FORWARD"; + } else if (so->so_tcpcb) { + state = tcpstates[so->so_tcpcb->t_state]; + } else { + state = "NONE"; + } + if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { + src_len = sizeof(src); + getsockname(so->s, (struct sockaddr *)&src, &src_len); + dst_addr = so->so_laddr; + dst_port = so->so_lport; + } else { + src.sin_addr = so->so_laddr; + src.sin_port = so->so_lport; + dst_addr = so->so_faddr; + dst_port = so->so_fport; + } + snprintf(buf, sizeof(buf), " TCP[%s]", state); + monitor_printf(mon, "%-19s %3d %15s %5d ", buf, so->s, + src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*", + ntohs(src.sin_port)); + monitor_printf(mon, "%15s %5d %5d %5d\n", + inet_ntoa(dst_addr), ntohs(dst_port), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } + + for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { + if (so->so_state & SS_HOSTFWD) { + snprintf(buf, sizeof(buf), " UDP[HOST_FORWARD]"); + src_len = sizeof(src); + getsockname(so->s, (struct sockaddr *)&src, &src_len); + dst_addr = so->so_laddr; + dst_port = so->so_lport; + } else { + snprintf(buf, sizeof(buf), " UDP[%d sec]", + (so->so_expire - curtime) / 1000); + src.sin_addr = so->so_laddr; + src.sin_port = so->so_lport; + dst_addr = so->so_faddr; + dst_port = so->so_fport; + } + monitor_printf(mon, "%-19s %3d %15s %5d ", buf, so->s, + src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*", + ntohs(src.sin_port)); + monitor_printf(mon, "%15s %5d %5d %5d\n", + inet_ntoa(dst_addr), ntohs(dst_port), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } + + for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { + snprintf(buf, sizeof(buf), " ICMP[%d sec]", + (so->so_expire - curtime) / 1000); + src.sin_addr = so->so_laddr; + dst_addr = so->so_faddr; + monitor_printf(mon, "%-19s %3d %15s - ", buf, so->s, + src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : "*"); + monitor_printf(mon, "%15s - %5d %5d\n", inet_ntoa(dst_addr), + so->so_rcv.sb_cc, so->so_snd.sb_cc); + } +} diff --git a/slirp/misc.h b/slirp/misc.h new file mode 100644 index 00000000..41a32583 --- /dev/null +++ b/slirp/misc.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _MISC_H_ +#define _MISC_H_ + +struct ex_list { + int ex_pty; /* Do we want a pty? */ + struct in_addr ex_addr; /* Server address */ + int ex_fport; /* Port to telnet to */ + const char *ex_exec; /* Command line of what to exec */ + struct ex_list *ex_next; +}; + +#define EMU_NONE 0x0 + +/* TCP emulations */ +#define EMU_CTL 0x1 +#define EMU_FTP 0x2 +#define EMU_KSH 0x3 +#define EMU_IRC 0x4 +#define EMU_REALAUDIO 0x5 +#define EMU_RLOGIN 0x6 +#define EMU_IDENT 0x7 +#define EMU_RSH 0x8 + +#define EMU_NOCONNECT 0x10 /* Don't connect */ + +struct tos_t { + uint16_t lport; + uint16_t fport; + uint8_t tos; + uint8_t emu; +}; + +struct emu_t { + uint16_t lport; + uint16_t fport; + uint8_t tos; + uint8_t emu; + struct emu_t *next; +}; + +void slirp_insque(void *, void *); +void slirp_remque(void *); +int add_exec(struct ex_list **, int, char *, struct in_addr, int); +int fork_exec(struct socket *so, const char *ex, int do_pty); + +#endif diff --git a/slirp/sbuf.c b/slirp/sbuf.c new file mode 100644 index 00000000..08ec2b4f --- /dev/null +++ b/slirp/sbuf.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include +#include + +static void sbappendsb(struct sbuf *sb, struct mbuf *m); + +void +sbfree(struct sbuf *sb) +{ + free(sb->sb_data); +} + +void +sbdrop(struct sbuf *sb, int num) +{ + int limit = sb->sb_datalen / 2; + + /* + * We can only drop how much we have + * This should never succeed + */ + if(num > sb->sb_cc) + num = sb->sb_cc; + sb->sb_cc -= num; + sb->sb_rptr += num; + if(sb->sb_rptr >= sb->sb_data + sb->sb_datalen) + sb->sb_rptr -= sb->sb_datalen; + + if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { + qemu_notify_event(); + } +} + +void +sbreserve(struct sbuf *sb, int size) +{ + if (sb->sb_data) { + /* Already alloced, realloc if necessary */ + if (sb->sb_datalen != size) { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)realloc(sb->sb_data, size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } + } else { + sb->sb_wptr = sb->sb_rptr = sb->sb_data = (char *)malloc(size); + sb->sb_cc = 0; + if (sb->sb_wptr) + sb->sb_datalen = size; + else + sb->sb_datalen = 0; + } +} + +/* + * Try and write() to the socket, whatever doesn't get written + * append to the buffer... for a host with a fast net connection, + * this prevents an unnecessary copy of the data + * (the socket is non-blocking, so we won't hang) + */ +void +sbappend(struct socket *so, struct mbuf *m) +{ + int ret = 0; + + DEBUG_CALL("sbappend"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("m->m_len = %d", m->m_len); + + /* Shouldn't happen, but... e.g. foreign host closes connection */ + if (m->m_len <= 0) { + m_free(m); + return; + } + + /* + * If there is urgent data, call sosendoob + * if not all was sent, sowrite will take care of the rest + * (The rest of this function is just an optimisation) + */ + if (so->so_urgc) { + sbappendsb(&so->so_rcv, m); + m_free(m); + sosendoob(so); + return; + } + + /* + * We only write if there's nothing in the buffer, + * ottherwise it'll arrive out of order, and hence corrupt + */ + if (!so->so_rcv.sb_cc) + ret = slirp_send(so, m->m_data, m->m_len, 0); + + if (ret <= 0) { + /* + * Nothing was written + * It's possible that the socket has closed, but + * we don't need to check because if it has closed, + * it will be detected in the normal way by soread() + */ + sbappendsb(&so->so_rcv, m); + } else if (ret != m->m_len) { + /* + * Something was written, but not everything.. + * sbappendsb the rest + */ + m->m_len -= ret; + m->m_data += ret; + sbappendsb(&so->so_rcv, m); + } /* else */ + /* Whatever happened, we free the mbuf */ + m_free(m); +} + +/* + * Copy the data from m into sb + * The caller is responsible to make sure there's enough room + */ +static void +sbappendsb(struct sbuf *sb, struct mbuf *m) +{ + int len, n, nn; + + len = m->m_len; + + if (sb->sb_wptr < sb->sb_rptr) { + n = sb->sb_rptr - sb->sb_wptr; + if (n > len) n = len; + memcpy(sb->sb_wptr, m->m_data, n); + } else { + /* Do the right edge first */ + n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; + if (n > len) n = len; + memcpy(sb->sb_wptr, m->m_data, n); + len -= n; + if (len) { + /* Now the left edge */ + nn = sb->sb_rptr - sb->sb_data; + if (nn > len) nn = len; + memcpy(sb->sb_data,m->m_data+n,nn); + n += nn; + } + } + + sb->sb_cc += n; + sb->sb_wptr += n; + if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) + sb->sb_wptr -= sb->sb_datalen; +} + +/* + * Copy data from sbuf to a normal, straight buffer + * Don't update the sbuf rptr, this will be + * done in sbdrop when the data is acked + */ +void +sbcopy(struct sbuf *sb, int off, int len, char *to) +{ + char *from; + + from = sb->sb_rptr + off; + if (from >= sb->sb_data + sb->sb_datalen) + from -= sb->sb_datalen; + + if (from < sb->sb_wptr) { + if (len > sb->sb_cc) len = sb->sb_cc; + memcpy(to,from,len); + } else { + /* re-use off */ + off = (sb->sb_data + sb->sb_datalen) - from; + if (off > len) off = len; + memcpy(to,from,off); + len -= off; + if (len) + memcpy(to+off,sb->sb_data,len); + } +} diff --git a/slirp/sbuf.h b/slirp/sbuf.h new file mode 100644 index 00000000..4f22e7c3 --- /dev/null +++ b/slirp/sbuf.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _SBUF_H_ +#define _SBUF_H_ + +#define sbflush(sb) sbdrop((sb),(sb)->sb_cc) +#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) + +struct sbuf { + u_int sb_cc; /* actual chars in buffer */ + u_int sb_datalen; /* Length of data */ + char *sb_wptr; /* write pointer. points to where the next + * bytes should be written in the sbuf */ + char *sb_rptr; /* read pointer. points to where the next + * byte should be read from the sbuf */ + char *sb_data; /* Actual data */ +}; + +void sbfree(struct sbuf *); +void sbdrop(struct sbuf *, int); +void sbreserve(struct sbuf *, int); +void sbappend(struct socket *, struct mbuf *); +void sbcopy(struct sbuf *, int, int, char *); + +#endif diff --git a/slirp/slirp.c b/slirp/slirp.c new file mode 100644 index 00000000..35f819af --- /dev/null +++ b/slirp/slirp.c @@ -0,0 +1,1207 @@ +/* + * libslirp glue + * + * Copyright (c) 2004-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu/timer.h" +#include "sysemu/char.h" +#include "slirp.h" +#include "hw/hw.h" + +/* host loopback address */ +struct in_addr loopback_addr; +/* host loopback network mask */ +unsigned long loopback_mask; + +/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ +static const uint8_t special_ethaddr[ETH_ALEN] = { + 0x52, 0x55, 0x00, 0x00, 0x00, 0x00 +}; + +u_int curtime; + +static QTAILQ_HEAD(slirp_instances, Slirp) slirp_instances = + QTAILQ_HEAD_INITIALIZER(slirp_instances); + +static struct in_addr dns_addr; +static u_int dns_addr_time; + +#define TIMEOUT_FAST 2 /* milliseconds */ +#define TIMEOUT_SLOW 499 /* milliseconds */ +/* for the aging of certain requests like DNS */ +#define TIMEOUT_DEFAULT 1000 /* milliseconds */ + +#ifdef _WIN32 + +int get_dns_addr(struct in_addr *pdns_addr) +{ + FIXED_INFO *FixedInfo=NULL; + ULONG BufLen; + DWORD ret; + IP_ADDR_STRING *pIPAddr; + struct in_addr tmp_addr; + + if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { + *pdns_addr = dns_addr; + return 0; + } + + FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); + BufLen = sizeof(FIXED_INFO); + + if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { + if (FixedInfo) { + GlobalFree(FixedInfo); + FixedInfo = NULL; + } + FixedInfo = GlobalAlloc(GPTR, BufLen); + } + + if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { + printf("GetNetworkParams failed. ret = %08x\n", (u_int)ret ); + if (FixedInfo) { + GlobalFree(FixedInfo); + FixedInfo = NULL; + } + return -1; + } + + pIPAddr = &(FixedInfo->DnsServerList); + inet_aton(pIPAddr->IpAddress.String, &tmp_addr); + *pdns_addr = tmp_addr; + dns_addr = tmp_addr; + dns_addr_time = curtime; + if (FixedInfo) { + GlobalFree(FixedInfo); + FixedInfo = NULL; + } + return 0; +} + +static void winsock_cleanup(void) +{ + WSACleanup(); +} + +#else + +static struct stat dns_addr_stat; + +int get_dns_addr(struct in_addr *pdns_addr) +{ + char buff[512]; + char buff2[257]; + FILE *f; + int found = 0; + struct in_addr tmp_addr; + + if (dns_addr.s_addr != 0) { + struct stat old_stat; + if ((curtime - dns_addr_time) < TIMEOUT_DEFAULT) { + *pdns_addr = dns_addr; + return 0; + } + old_stat = dns_addr_stat; + if (stat("/etc/resolv.conf", &dns_addr_stat) != 0) + return -1; + if ((dns_addr_stat.st_dev == old_stat.st_dev) + && (dns_addr_stat.st_ino == old_stat.st_ino) + && (dns_addr_stat.st_size == old_stat.st_size) + && (dns_addr_stat.st_mtime == old_stat.st_mtime)) { + *pdns_addr = dns_addr; + return 0; + } + } + + f = fopen("/etc/resolv.conf", "r"); + if (!f) + return -1; + +#ifdef DEBUG + fprintf(stderr, "IP address of your DNS(s): "); +#endif + while (fgets(buff, 512, f) != NULL) { + if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { + if (!inet_aton(buff2, &tmp_addr)) + continue; + /* If it's the first one, set it to dns_addr */ + if (!found) { + *pdns_addr = tmp_addr; + dns_addr = tmp_addr; + dns_addr_time = curtime; + } +#ifdef DEBUG + else + fprintf(stderr, ", "); +#endif + if (++found > 3) { +#ifdef DEBUG + fprintf(stderr, "(more)"); +#endif + break; + } +#ifdef DEBUG + else + fprintf(stderr, "%s", inet_ntoa(tmp_addr)); +#endif + } + } + fclose(f); + if (!found) + return -1; + return 0; +} + +#endif + +static void slirp_init_once(void) +{ + static int initialized; +#ifdef _WIN32 + WSADATA Data; +#endif + + if (initialized) { + return; + } + initialized = 1; + +#ifdef _WIN32 + WSAStartup(MAKEWORD(2,0), &Data); + atexit(winsock_cleanup); +#endif + + loopback_addr.s_addr = htonl(INADDR_LOOPBACK); + loopback_mask = htonl(IN_CLASSA_NET); +} + +static void slirp_state_save(QEMUFile *f, void *opaque); +static int slirp_state_load(QEMUFile *f, void *opaque, int version_id); + +Slirp *slirp_init(int restricted, struct in_addr vnetwork, + struct in_addr vnetmask, struct in_addr vhost, + const char *vhostname, const char *tftp_path, + const char *bootfile, struct in_addr vdhcp_start, + struct in_addr vnameserver, const char **vdnssearch, + void *opaque) +{ + Slirp *slirp = g_malloc0(sizeof(Slirp)); + + slirp_init_once(); + + slirp->restricted = restricted; + + if_init(slirp); + ip_init(slirp); + + /* Initialise mbufs *after* setting the MTU */ + m_init(slirp); + + slirp->vnetwork_addr = vnetwork; + slirp->vnetwork_mask = vnetmask; + slirp->vhost_addr = vhost; + if (vhostname) { + pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), + vhostname); + } + slirp->tftp_prefix = g_strdup(tftp_path); + slirp->bootp_filename = g_strdup(bootfile); + slirp->vdhcp_startaddr = vdhcp_start; + slirp->vnameserver_addr = vnameserver; + + if (vdnssearch) { + translate_dnssearch(slirp, vdnssearch); + } + + slirp->opaque = opaque; + + register_savevm(NULL, "slirp", 0, 3, + slirp_state_save, slirp_state_load, slirp); + + QTAILQ_INSERT_TAIL(&slirp_instances, slirp, entry); + + return slirp; +} + +void slirp_cleanup(Slirp *slirp) +{ + QTAILQ_REMOVE(&slirp_instances, slirp, entry); + + unregister_savevm(NULL, "slirp", slirp); + + ip_cleanup(slirp); + m_cleanup(slirp); + + g_free(slirp->vdnssearch); + g_free(slirp->tftp_prefix); + g_free(slirp->bootp_filename); + g_free(slirp); +} + +#define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) +#define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED) + +static void slirp_update_timeout(uint32_t *timeout) +{ + Slirp *slirp; + uint32_t t; + + if (*timeout <= TIMEOUT_FAST) { + return; + } + + t = MIN(1000, *timeout); + + /* If we have tcp timeout with slirp, then we will fill @timeout with + * more precise value. + */ + QTAILQ_FOREACH(slirp, &slirp_instances, entry) { + if (slirp->time_fasttimo) { + *timeout = TIMEOUT_FAST; + return; + } + if (slirp->do_slowtimo) { + t = MIN(TIMEOUT_SLOW, t); + } + } + *timeout = t; +} + +void slirp_pollfds_fill(GArray *pollfds, uint32_t *timeout) +{ + Slirp *slirp; + struct socket *so, *so_next; + + if (QTAILQ_EMPTY(&slirp_instances)) { + return; + } + + /* + * First, TCP sockets + */ + + QTAILQ_FOREACH(slirp, &slirp_instances, entry) { + /* + * *_slowtimo needs calling if there are IP fragments + * in the fragment queue, or there are TCP connections active + */ + slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || + (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); + + for (so = slirp->tcb.so_next; so != &slirp->tcb; + so = so_next) { + int events = 0; + + so_next = so->so_next; + + so->pollfds_idx = -1; + + /* + * See if we need a tcp_fasttimo + */ + if (slirp->time_fasttimo == 0 && + so->so_tcpcb->t_flags & TF_DELACK) { + slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ + } + + /* + * NOFDREF can include still connecting to local-host, + * newly socreated() sockets etc. Don't want to select these. + */ + if (so->so_state & SS_NOFDREF || so->s == -1) { + continue; + } + + /* + * Set for reading sockets which are accepting + */ + if (so->so_state & SS_FACCEPTCONN) { + GPollFD pfd = { + .fd = so->s, + .events = G_IO_IN | G_IO_HUP | G_IO_ERR, + }; + so->pollfds_idx = pollfds->len; + g_array_append_val(pollfds, pfd); + continue; + } + + /* + * Set for writing sockets which are connecting + */ + if (so->so_state & SS_ISFCONNECTING) { + GPollFD pfd = { + .fd = so->s, + .events = G_IO_OUT | G_IO_ERR, + }; + so->pollfds_idx = pollfds->len; + g_array_append_val(pollfds, pfd); + continue; + } + + /* + * Set for writing if we are connected, can send more, and + * we have something to send + */ + if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { + events |= G_IO_OUT | G_IO_ERR; + } + + /* + * Set for reading (and urgent data) if we are connected, can + * receive more, and we have room for it XXX /2 ? + */ + if (CONN_CANFRCV(so) && + (so->so_snd.sb_cc < (so->so_snd.sb_datalen/2))) { + events |= G_IO_IN | G_IO_HUP | G_IO_ERR | G_IO_PRI; + } + + if (events) { + GPollFD pfd = { + .fd = so->s, + .events = events, + }; + so->pollfds_idx = pollfds->len; + g_array_append_val(pollfds, pfd); + } + } + + /* + * UDP sockets + */ + for (so = slirp->udb.so_next; so != &slirp->udb; + so = so_next) { + so_next = so->so_next; + + so->pollfds_idx = -1; + + /* + * See if it's timed out + */ + if (so->so_expire) { + if (so->so_expire <= curtime) { + udp_detach(so); + continue; + } else { + slirp->do_slowtimo = true; /* Let socket expire */ + } + } + + /* + * When UDP packets are received from over the + * link, they're sendto()'d straight away, so + * no need for setting for writing + * Limit the number of packets queued by this session + * to 4. Note that even though we try and limit this + * to 4 packets, the session could have more queued + * if the packets needed to be fragmented + * (XXX <= 4 ?) + */ + if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { + GPollFD pfd = { + .fd = so->s, + .events = G_IO_IN | G_IO_HUP | G_IO_ERR, + }; + so->pollfds_idx = pollfds->len; + g_array_append_val(pollfds, pfd); + } + } + + /* + * ICMP sockets + */ + for (so = slirp->icmp.so_next; so != &slirp->icmp; + so = so_next) { + so_next = so->so_next; + + so->pollfds_idx = -1; + + /* + * See if it's timed out + */ + if (so->so_expire) { + if (so->so_expire <= curtime) { + icmp_detach(so); + continue; + } else { + slirp->do_slowtimo = true; /* Let socket expire */ + } + } + + if (so->so_state & SS_ISFCONNECTED) { + GPollFD pfd = { + .fd = so->s, + .events = G_IO_IN | G_IO_HUP | G_IO_ERR, + }; + so->pollfds_idx = pollfds->len; + g_array_append_val(pollfds, pfd); + } + } + } + slirp_update_timeout(timeout); +} + +void slirp_pollfds_poll(GArray *pollfds, int select_error) +{ + Slirp *slirp; + struct socket *so, *so_next; + int ret; + + if (QTAILQ_EMPTY(&slirp_instances)) { + return; + } + + curtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + QTAILQ_FOREACH(slirp, &slirp_instances, entry) { + /* + * See if anything has timed out + */ + if (slirp->time_fasttimo && + ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { + tcp_fasttimo(slirp); + slirp->time_fasttimo = 0; + } + if (slirp->do_slowtimo && + ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { + ip_slowtimo(slirp); + tcp_slowtimo(slirp); + slirp->last_slowtimo = curtime; + } + + /* + * Check sockets + */ + if (!select_error) { + /* + * Check TCP sockets + */ + for (so = slirp->tcb.so_next; so != &slirp->tcb; + so = so_next) { + int revents; + + so_next = so->so_next; + + revents = 0; + if (so->pollfds_idx != -1) { + revents = g_array_index(pollfds, GPollFD, + so->pollfds_idx).revents; + } + + if (so->so_state & SS_NOFDREF || so->s == -1) { + continue; + } + + /* + * Check for URG data + * This will soread as well, so no need to + * test for G_IO_IN below if this succeeds + */ + if (revents & G_IO_PRI) { + sorecvoob(so); + } + /* + * Check sockets for reading + */ + else if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) { + /* + * Check for incoming connections + */ + if (so->so_state & SS_FACCEPTCONN) { + tcp_connect(so); + continue; + } /* else */ + ret = soread(so); + + /* Output it if we read something */ + if (ret > 0) { + tcp_output(sototcpcb(so)); + } + } + + /* + * Check sockets for writing + */ + if (!(so->so_state & SS_NOFDREF) && + (revents & (G_IO_OUT | G_IO_ERR))) { + /* + * Check for non-blocking, still-connecting sockets + */ + if (so->so_state & SS_ISFCONNECTING) { + /* Connected */ + so->so_state &= ~SS_ISFCONNECTING; + + ret = send(so->s, (const void *) &ret, 0, 0); + if (ret < 0) { + /* XXXXX Must fix, zero bytes is a NOP */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) { + continue; + } + + /* else failed */ + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; + } + /* else so->so_state &= ~SS_ISFCONNECTING; */ + + /* + * Continue tcp_input + */ + tcp_input((struct mbuf *)NULL, sizeof(struct ip), so); + /* continue; */ + } else { + ret = sowrite(so); + } + /* + * XXXXX If we wrote something (a lot), there + * could be a need for a window update. + * In the worst case, the remote will send + * a window probe to get things going again + */ + } + + /* + * Probe a still-connecting, non-blocking socket + * to check if it's still alive + */ +#ifdef PROBE_CONN + if (so->so_state & SS_ISFCONNECTING) { + ret = qemu_recv(so->s, &ret, 0, 0); + + if (ret < 0) { + /* XXX */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) { + continue; /* Still connecting, continue */ + } + + /* else failed */ + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; + + /* tcp_input will take care of it */ + } else { + ret = send(so->s, &ret, 0, 0); + if (ret < 0) { + /* XXX */ + if (errno == EAGAIN || errno == EWOULDBLOCK || + errno == EINPROGRESS || errno == ENOTCONN) { + continue; + } + /* else failed */ + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; + } else { + so->so_state &= ~SS_ISFCONNECTING; + } + + } + tcp_input((struct mbuf *)NULL, sizeof(struct ip), so); + } /* SS_ISFCONNECTING */ +#endif + } + + /* + * Now UDP sockets. + * Incoming packets are sent straight away, they're not buffered. + * Incoming UDP data isn't buffered either. + */ + for (so = slirp->udb.so_next; so != &slirp->udb; + so = so_next) { + int revents; + + so_next = so->so_next; + + revents = 0; + if (so->pollfds_idx != -1) { + revents = g_array_index(pollfds, GPollFD, + so->pollfds_idx).revents; + } + + if (so->s != -1 && + (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR))) { + sorecvfrom(so); + } + } + + /* + * Check incoming ICMP relies. + */ + for (so = slirp->icmp.so_next; so != &slirp->icmp; + so = so_next) { + int revents; + + so_next = so->so_next; + + revents = 0; + if (so->pollfds_idx != -1) { + revents = g_array_index(pollfds, GPollFD, + so->pollfds_idx).revents; + } + + if (so->s != -1 && + (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR))) { + icmp_receive(so); + } + } + } + + if_start(slirp); + } +} + +static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ + struct arphdr *ah = (struct arphdr *)(pkt + ETH_HLEN); + uint8_t arp_reply[max(ETH_HLEN + sizeof(struct arphdr), 64)]; + struct ethhdr *reh = (struct ethhdr *)arp_reply; + struct arphdr *rah = (struct arphdr *)(arp_reply + ETH_HLEN); + int ar_op; + struct ex_list *ex_ptr; + + ar_op = ntohs(ah->ar_op); + switch(ar_op) { + case ARPOP_REQUEST: + if (ah->ar_tip == ah->ar_sip) { + /* Gratuitous ARP */ + arp_table_add(slirp, ah->ar_sip, ah->ar_sha); + return; + } + + if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + if (ah->ar_tip == slirp->vnameserver_addr.s_addr || + ah->ar_tip == slirp->vhost_addr.s_addr) + goto arp_ok; + for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_addr.s_addr == ah->ar_tip) + goto arp_ok; + } + return; + arp_ok: + memset(arp_reply, 0, sizeof(arp_reply)); + + arp_table_add(slirp, ah->ar_sip, ah->ar_sha); + + /* ARP request for alias/dns mac address */ + memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); + memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); + memcpy(&reh->h_source[2], &ah->ar_tip, 4); + reh->h_proto = htons(ETH_P_ARP); + + rah->ar_hrd = htons(1); + rah->ar_pro = htons(ETH_P_IP); + rah->ar_hln = ETH_ALEN; + rah->ar_pln = 4; + rah->ar_op = htons(ARPOP_REPLY); + memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); + rah->ar_sip = ah->ar_tip; + memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); + rah->ar_tip = ah->ar_sip; + slirp_output(slirp->opaque, arp_reply, sizeof(arp_reply)); + } + break; + case ARPOP_REPLY: + arp_table_add(slirp, ah->ar_sip, ah->ar_sha); + break; + default: + break; + } +} + +void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) +{ + struct mbuf *m; + int proto; + + if (pkt_len < ETH_HLEN) + return; + + proto = ntohs(*(uint16_t *)(pkt + 12)); + switch(proto) { + case ETH_P_ARP: + arp_input(slirp, pkt, pkt_len); + break; + case ETH_P_IP: + m = m_get(slirp); + if (!m) + return; + /* Note: we add to align the IP header */ + if (M_FREEROOM(m) < pkt_len + 2) { + m_inc(m, pkt_len + 2); + } + m->m_len = pkt_len + 2; + memcpy(m->m_data + 2, pkt, pkt_len); + + m->m_data += 2 + ETH_HLEN; + m->m_len -= 2 + ETH_HLEN; + + ip_input(m); + break; + default: + break; + } +} + +/* Output the IP packet to the ethernet device. Returns 0 if the packet must be + * re-queued. + */ +int if_encap(Slirp *slirp, struct mbuf *ifm) +{ + uint8_t buf[1600]; + struct ethhdr *eh = (struct ethhdr *)buf; + uint8_t ethaddr[ETH_ALEN]; + const struct ip *iph = (const struct ip *)ifm->m_data; + + if (ifm->m_len + ETH_HLEN > sizeof(buf)) { + return 1; + } + + if (iph->ip_dst.s_addr == 0) { + /* 0.0.0.0 can not be a destination address, something went wrong, + * avoid making it worse */ + return 1; + } + if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { + uint8_t arp_req[ETH_HLEN + sizeof(struct arphdr)]; + struct ethhdr *reh = (struct ethhdr *)arp_req; + struct arphdr *rah = (struct arphdr *)(arp_req + ETH_HLEN); + + if (!ifm->arp_requested) { + /* If the client addr is not known, send an ARP request */ + memset(reh->h_dest, 0xff, ETH_ALEN); + memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); + memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); + reh->h_proto = htons(ETH_P_ARP); + rah->ar_hrd = htons(1); + rah->ar_pro = htons(ETH_P_IP); + rah->ar_hln = ETH_ALEN; + rah->ar_pln = 4; + rah->ar_op = htons(ARPOP_REQUEST); + + /* source hw addr */ + memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); + memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); + + /* source IP */ + rah->ar_sip = slirp->vhost_addr.s_addr; + + /* target hw addr (none) */ + memset(rah->ar_tha, 0, ETH_ALEN); + + /* target IP */ + rah->ar_tip = iph->ip_dst.s_addr; + slirp->client_ipaddr = iph->ip_dst; + slirp_output(slirp->opaque, arp_req, sizeof(arp_req)); + ifm->arp_requested = true; + + /* Expire request and drop outgoing packet after 1 second */ + ifm->expiration_date = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + 1000000000ULL; + } + return 0; + } else { + memcpy(eh->h_dest, ethaddr, ETH_ALEN); + memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); + /* XXX: not correct */ + memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); + eh->h_proto = htons(ETH_P_IP); + memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); + slirp_output(slirp->opaque, buf, ifm->m_len + ETH_HLEN); + return 1; + } +} + +/* Drop host forwarding rule, return 0 if found. */ +int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, + int host_port) +{ + struct socket *so; + struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); + struct sockaddr_in addr; + int port = htons(host_port); + socklen_t addr_len; + + for (so = head->so_next; so != head; so = so->so_next) { + addr_len = sizeof(addr); + if ((so->so_state & SS_HOSTFWD) && + getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && + addr.sin_addr.s_addr == host_addr.s_addr && + addr.sin_port == port) { + close(so->s); + sofree(so); + return 0; + } + } + + return -1; +} + +int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, + int host_port, struct in_addr guest_addr, int guest_port) +{ + if (!guest_addr.s_addr) { + guest_addr = slirp->vdhcp_startaddr; + } + if (is_udp) { + if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), + guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) + return -1; + } else { + if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), + guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) + return -1; + } + return 0; +} + +int slirp_add_exec(Slirp *slirp, int do_pty, const void *args, + struct in_addr *guest_addr, int guest_port) +{ + if (!guest_addr->s_addr) { + guest_addr->s_addr = slirp->vnetwork_addr.s_addr | + (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); + } + if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != + slirp->vnetwork_addr.s_addr || + guest_addr->s_addr == slirp->vhost_addr.s_addr || + guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { + return -1; + } + return add_exec(&slirp->exec_list, do_pty, (char *)args, *guest_addr, + htons(guest_port)); +} + +ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) +{ + if (so->s == -1 && so->extra) { + qemu_chr_fe_write(so->extra, buf, len); + return len; + } + + return send(so->s, buf, len, flags); +} + +static struct socket * +slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, int guest_port) +{ + struct socket *so; + + for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { + if (so->so_faddr.s_addr == guest_addr.s_addr && + htons(so->so_fport) == guest_port) { + return so; + } + } + return NULL; +} + +size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port) +{ + struct iovec iov[2]; + struct socket *so; + + so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); + + if (!so || so->so_state & SS_NOFDREF) { + return 0; + } + + if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen/2)) { + return 0; + } + + return sopreprbuf(so, iov, NULL); +} + +void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, + const uint8_t *buf, int size) +{ + int ret; + struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); + + if (!so) + return; + + ret = soreadbuf(so, (const char *)buf, size); + + if (ret > 0) + tcp_output(sototcpcb(so)); +} + +static void slirp_tcp_save(QEMUFile *f, struct tcpcb *tp) +{ + int i; + + qemu_put_sbe16(f, tp->t_state); + for (i = 0; i < TCPT_NTIMERS; i++) + qemu_put_sbe16(f, tp->t_timer[i]); + qemu_put_sbe16(f, tp->t_rxtshift); + qemu_put_sbe16(f, tp->t_rxtcur); + qemu_put_sbe16(f, tp->t_dupacks); + qemu_put_be16(f, tp->t_maxseg); + qemu_put_sbyte(f, tp->t_force); + qemu_put_be16(f, tp->t_flags); + qemu_put_be32(f, tp->snd_una); + qemu_put_be32(f, tp->snd_nxt); + qemu_put_be32(f, tp->snd_up); + qemu_put_be32(f, tp->snd_wl1); + qemu_put_be32(f, tp->snd_wl2); + qemu_put_be32(f, tp->iss); + qemu_put_be32(f, tp->snd_wnd); + qemu_put_be32(f, tp->rcv_wnd); + qemu_put_be32(f, tp->rcv_nxt); + qemu_put_be32(f, tp->rcv_up); + qemu_put_be32(f, tp->irs); + qemu_put_be32(f, tp->rcv_adv); + qemu_put_be32(f, tp->snd_max); + qemu_put_be32(f, tp->snd_cwnd); + qemu_put_be32(f, tp->snd_ssthresh); + qemu_put_sbe16(f, tp->t_idle); + qemu_put_sbe16(f, tp->t_rtt); + qemu_put_be32(f, tp->t_rtseq); + qemu_put_sbe16(f, tp->t_srtt); + qemu_put_sbe16(f, tp->t_rttvar); + qemu_put_be16(f, tp->t_rttmin); + qemu_put_be32(f, tp->max_sndwnd); + qemu_put_byte(f, tp->t_oobflags); + qemu_put_byte(f, tp->t_iobc); + qemu_put_sbe16(f, tp->t_softerror); + qemu_put_byte(f, tp->snd_scale); + qemu_put_byte(f, tp->rcv_scale); + qemu_put_byte(f, tp->request_r_scale); + qemu_put_byte(f, tp->requested_s_scale); + qemu_put_be32(f, tp->ts_recent); + qemu_put_be32(f, tp->ts_recent_age); + qemu_put_be32(f, tp->last_ack_sent); +} + +static void slirp_sbuf_save(QEMUFile *f, struct sbuf *sbuf) +{ + uint32_t off; + + qemu_put_be32(f, sbuf->sb_cc); + qemu_put_be32(f, sbuf->sb_datalen); + off = (uint32_t)(sbuf->sb_wptr - sbuf->sb_data); + qemu_put_sbe32(f, off); + off = (uint32_t)(sbuf->sb_rptr - sbuf->sb_data); + qemu_put_sbe32(f, off); + qemu_put_buffer(f, (unsigned char*)sbuf->sb_data, sbuf->sb_datalen); +} + +static void slirp_socket_save(QEMUFile *f, struct socket *so) +{ + qemu_put_be32(f, so->so_urgc); + qemu_put_be32(f, so->so_faddr.s_addr); + qemu_put_be32(f, so->so_laddr.s_addr); + qemu_put_be16(f, so->so_fport); + qemu_put_be16(f, so->so_lport); + qemu_put_byte(f, so->so_iptos); + qemu_put_byte(f, so->so_emu); + qemu_put_byte(f, so->so_type); + qemu_put_be32(f, so->so_state); + slirp_sbuf_save(f, &so->so_rcv); + slirp_sbuf_save(f, &so->so_snd); + slirp_tcp_save(f, so->so_tcpcb); +} + +static void slirp_bootp_save(QEMUFile *f, Slirp *slirp) +{ + int i; + + for (i = 0; i < NB_BOOTP_CLIENTS; i++) { + qemu_put_be16(f, slirp->bootp_clients[i].allocated); + qemu_put_buffer(f, slirp->bootp_clients[i].macaddr, 6); + } +} + +static void slirp_state_save(QEMUFile *f, void *opaque) +{ + Slirp *slirp = opaque; + struct ex_list *ex_ptr; + + for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) + if (ex_ptr->ex_pty == 3) { + struct socket *so; + so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, + ntohs(ex_ptr->ex_fport)); + if (!so) + continue; + + qemu_put_byte(f, 42); + slirp_socket_save(f, so); + } + qemu_put_byte(f, 0); + + qemu_put_be16(f, slirp->ip_id); + + slirp_bootp_save(f, slirp); +} + +static void slirp_tcp_load(QEMUFile *f, struct tcpcb *tp) +{ + int i; + + tp->t_state = qemu_get_sbe16(f); + for (i = 0; i < TCPT_NTIMERS; i++) + tp->t_timer[i] = qemu_get_sbe16(f); + tp->t_rxtshift = qemu_get_sbe16(f); + tp->t_rxtcur = qemu_get_sbe16(f); + tp->t_dupacks = qemu_get_sbe16(f); + tp->t_maxseg = qemu_get_be16(f); + tp->t_force = qemu_get_sbyte(f); + tp->t_flags = qemu_get_be16(f); + tp->snd_una = qemu_get_be32(f); + tp->snd_nxt = qemu_get_be32(f); + tp->snd_up = qemu_get_be32(f); + tp->snd_wl1 = qemu_get_be32(f); + tp->snd_wl2 = qemu_get_be32(f); + tp->iss = qemu_get_be32(f); + tp->snd_wnd = qemu_get_be32(f); + tp->rcv_wnd = qemu_get_be32(f); + tp->rcv_nxt = qemu_get_be32(f); + tp->rcv_up = qemu_get_be32(f); + tp->irs = qemu_get_be32(f); + tp->rcv_adv = qemu_get_be32(f); + tp->snd_max = qemu_get_be32(f); + tp->snd_cwnd = qemu_get_be32(f); + tp->snd_ssthresh = qemu_get_be32(f); + tp->t_idle = qemu_get_sbe16(f); + tp->t_rtt = qemu_get_sbe16(f); + tp->t_rtseq = qemu_get_be32(f); + tp->t_srtt = qemu_get_sbe16(f); + tp->t_rttvar = qemu_get_sbe16(f); + tp->t_rttmin = qemu_get_be16(f); + tp->max_sndwnd = qemu_get_be32(f); + tp->t_oobflags = qemu_get_byte(f); + tp->t_iobc = qemu_get_byte(f); + tp->t_softerror = qemu_get_sbe16(f); + tp->snd_scale = qemu_get_byte(f); + tp->rcv_scale = qemu_get_byte(f); + tp->request_r_scale = qemu_get_byte(f); + tp->requested_s_scale = qemu_get_byte(f); + tp->ts_recent = qemu_get_be32(f); + tp->ts_recent_age = qemu_get_be32(f); + tp->last_ack_sent = qemu_get_be32(f); + tcp_template(tp); +} + +static int slirp_sbuf_load(QEMUFile *f, struct sbuf *sbuf) +{ + uint32_t off, sb_cc, sb_datalen; + + sb_cc = qemu_get_be32(f); + sb_datalen = qemu_get_be32(f); + + sbreserve(sbuf, sb_datalen); + + if (sbuf->sb_datalen != sb_datalen) + return -ENOMEM; + + sbuf->sb_cc = sb_cc; + + off = qemu_get_sbe32(f); + sbuf->sb_wptr = sbuf->sb_data + off; + off = qemu_get_sbe32(f); + sbuf->sb_rptr = sbuf->sb_data + off; + qemu_get_buffer(f, (unsigned char*)sbuf->sb_data, sbuf->sb_datalen); + + return 0; +} + +static int slirp_socket_load(QEMUFile *f, struct socket *so) +{ + if (tcp_attach(so) < 0) + return -ENOMEM; + + so->so_urgc = qemu_get_be32(f); + so->so_faddr.s_addr = qemu_get_be32(f); + so->so_laddr.s_addr = qemu_get_be32(f); + so->so_fport = qemu_get_be16(f); + so->so_lport = qemu_get_be16(f); + so->so_iptos = qemu_get_byte(f); + so->so_emu = qemu_get_byte(f); + so->so_type = qemu_get_byte(f); + so->so_state = qemu_get_be32(f); + if (slirp_sbuf_load(f, &so->so_rcv) < 0) + return -ENOMEM; + if (slirp_sbuf_load(f, &so->so_snd) < 0) + return -ENOMEM; + slirp_tcp_load(f, so->so_tcpcb); + + return 0; +} + +static void slirp_bootp_load(QEMUFile *f, Slirp *slirp) +{ + int i; + + for (i = 0; i < NB_BOOTP_CLIENTS; i++) { + slirp->bootp_clients[i].allocated = qemu_get_be16(f); + qemu_get_buffer(f, slirp->bootp_clients[i].macaddr, 6); + } +} + +static int slirp_state_load(QEMUFile *f, void *opaque, int version_id) +{ + Slirp *slirp = opaque; + struct ex_list *ex_ptr; + + while (qemu_get_byte(f)) { + int ret; + struct socket *so = socreate(slirp); + + if (!so) + return -ENOMEM; + + ret = slirp_socket_load(f, so); + + if (ret < 0) + return ret; + + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != + slirp->vnetwork_addr.s_addr) { + return -EINVAL; + } + for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_pty == 3 && + so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && + so->so_fport == ex_ptr->ex_fport) { + break; + } + } + if (!ex_ptr) + return -EINVAL; + + so->extra = (void *)ex_ptr->ex_exec; + } + + if (version_id >= 2) { + slirp->ip_id = qemu_get_be16(f); + } + + if (version_id >= 3) { + slirp_bootp_load(f, slirp); + } + + return 0; +} diff --git a/slirp/slirp.h b/slirp/slirp.h new file mode 100644 index 00000000..6589d7ee --- /dev/null +++ b/slirp/slirp.h @@ -0,0 +1,361 @@ +#ifndef __COMMON_H__ +#define __COMMON_H__ + +#include "config-host.h" +#include "slirp_config.h" + +#ifdef _WIN32 +# include + +typedef char *caddr_t; + +# include +# include +# include +# include +# include + +#else +# define ioctlsocket ioctl +# define closesocket(s) close(s) +# if !defined(__HAIKU__) +# define O_BINARY 0 +# endif +#endif + +#include +#ifdef HAVE_SYS_BITYPES_H +# include +#endif + +#include + +#ifdef HAVE_UNISTD_H +# include +#endif + +#ifdef HAVE_STDLIB_H +# include +#endif + +#include +#include + +#ifndef HAVE_MEMMOVE +#define memmove(x, y, z) bcopy(y, x, z) +#endif + +#if TIME_WITH_SYS_TIME +# include +# include +#else +# ifdef HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +#ifdef HAVE_STRING_H +# include +#else +# include +#endif + +#ifndef _WIN32 +#include +#endif + +#ifndef _WIN32 +#include +#include +#endif + +/* Systems lacking strdup() definition in . */ +#if defined(ultrix) +char *strdup(const char *); +#endif + +/* Systems lacking malloc() definition in . */ +#if defined(ultrix) || defined(hcx) +void *malloc(size_t arg); +void free(void *ptr); +#endif + +#include +#ifndef NO_UNIX_SOCKETS +#include +#endif +#include +#ifdef HAVE_SYS_SIGNAL_H +# include +#endif +#ifndef _WIN32 +#include +#endif + +#if defined(HAVE_SYS_IOCTL_H) +# include +#endif + +#ifdef HAVE_SYS_SELECT_H +# include +#endif + +#ifdef HAVE_SYS_WAIT_H +# include +#endif + +#ifdef HAVE_SYS_FILIO_H +# include +#endif + +#ifdef USE_PPP +#include +#endif + +#ifdef __STDC__ +#include +#else +#include +#endif + +#include + +/* Avoid conflicting with the libc insque() and remque(), which + have different prototypes. */ +#define insque slirp_insque +#define remque slirp_remque + +#ifdef HAVE_SYS_STROPTS_H +#include +#endif + +#include "debug.h" + +#include "qemu/queue.h" +#include "qemu/sockets.h" + +#include "libslirp.h" +#include "ip.h" +#include "tcp.h" +#include "tcp_timer.h" +#include "tcp_var.h" +#include "tcpip.h" +#include "udp.h" +#include "ip_icmp.h" +#include "mbuf.h" +#include "sbuf.h" +#include "socket.h" +#include "if.h" +#include "main.h" +#include "misc.h" +#ifdef USE_PPP +#include "ppp/pppd.h" +#include "ppp/ppp.h" +#endif + +#include "bootp.h" +#include "tftp.h" + +#define ETH_ALEN 6 +#define ETH_HLEN 14 + +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_ARP 0x0806 /* Address Resolution packet */ + +#define ARPOP_REQUEST 1 /* ARP request */ +#define ARPOP_REPLY 2 /* ARP reply */ + +struct ethhdr { + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + unsigned short h_proto; /* packet type ID field */ +}; + +struct arphdr { + unsigned short ar_hrd; /* format of hardware address */ + unsigned short ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + unsigned short ar_op; /* ARP opcode (command) */ + + /* + * Ethernet looks like this : This bit is variable sized however... + */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + uint32_t ar_sip; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + uint32_t ar_tip; /* target IP address */ +} QEMU_PACKED; + +#define ARP_TABLE_SIZE 16 + +typedef struct ArpTable { + struct arphdr table[ARP_TABLE_SIZE]; + int next_victim; +} ArpTable; + +void arp_table_add(Slirp *slirp, uint32_t ip_addr, uint8_t ethaddr[ETH_ALEN]); + +bool arp_table_search(Slirp *slirp, uint32_t ip_addr, + uint8_t out_ethaddr[ETH_ALEN]); + +struct Slirp { + QTAILQ_ENTRY(Slirp) entry; + u_int time_fasttimo; + u_int last_slowtimo; + bool do_slowtimo; + + /* virtual network configuration */ + struct in_addr vnetwork_addr; + struct in_addr vnetwork_mask; + struct in_addr vhost_addr; + struct in_addr vdhcp_startaddr; + struct in_addr vnameserver_addr; + + struct in_addr client_ipaddr; + char client_hostname[33]; + + int restricted; + struct ex_list *exec_list; + + /* mbuf states */ + struct mbuf m_freelist, m_usedlist; + int mbuf_alloced; + + /* if states */ + struct mbuf if_fastq; /* fast queue (for interactive data) */ + struct mbuf if_batchq; /* queue for non-interactive data */ + struct mbuf *next_m; /* pointer to next mbuf to output */ + bool if_start_busy; /* avoid if_start recursion */ + + /* ip states */ + struct ipq ipq; /* ip reass. queue */ + uint16_t ip_id; /* ip packet ctr, for ids */ + + /* bootp/dhcp states */ + BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; + char *bootp_filename; + size_t vdnssearch_len; + uint8_t *vdnssearch; + + /* tcp states */ + struct socket tcb; + struct socket *tcp_last_so; + tcp_seq tcp_iss; /* tcp initial send seq # */ + uint32_t tcp_now; /* for RFC 1323 timestamps */ + + /* udp states */ + struct socket udb; + struct socket *udp_last_so; + + /* icmp states */ + struct socket icmp; + struct socket *icmp_last_so; + + /* tftp states */ + char *tftp_prefix; + struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; + + ArpTable arp_table; + + void *opaque; +}; + +extern Slirp *slirp_instance; + +#ifndef NULL +#define NULL (void *)0 +#endif + +#ifndef FULL_BOLT +void if_start(Slirp *); +#else +void if_start(struct ttys *); +#endif + +#ifndef HAVE_STRERROR + char *strerror(int error); +#endif + +#ifndef HAVE_INDEX + char *index(const char *, int); +#endif + +#ifndef HAVE_GETHOSTID + long gethostid(void); +#endif + +#ifndef _WIN32 +#include +#endif + +#define DEFAULT_BAUD 115200 + +#define SO_OPTIONS DO_KEEPALIVE +#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) + +/* dnssearch.c */ +int translate_dnssearch(Slirp *s, const char ** names); + +/* cksum.c */ +int cksum(struct mbuf *m, int len); + +/* if.c */ +void if_init(Slirp *); +void if_output(struct socket *, struct mbuf *); + +/* ip_input.c */ +void ip_init(Slirp *); +void ip_cleanup(Slirp *); +void ip_input(struct mbuf *); +void ip_slowtimo(Slirp *); +void ip_stripoptions(register struct mbuf *, struct mbuf *); + +/* ip_output.c */ +int ip_output(struct socket *, struct mbuf *); + +/* tcp_input.c */ +void tcp_input(register struct mbuf *, int, struct socket *); +int tcp_mss(register struct tcpcb *, u_int); + +/* tcp_output.c */ +int tcp_output(register struct tcpcb *); +void tcp_setpersist(register struct tcpcb *); + +/* tcp_subr.c */ +void tcp_init(Slirp *); +void tcp_cleanup(Slirp *); +void tcp_template(struct tcpcb *); +void tcp_respond(struct tcpcb *, register struct tcpiphdr *, register struct mbuf *, tcp_seq, tcp_seq, int); +struct tcpcb * tcp_newtcpcb(struct socket *); +struct tcpcb * tcp_close(register struct tcpcb *); +void tcp_sockclosed(struct tcpcb *); +int tcp_fconnect(struct socket *); +void tcp_connect(struct socket *); +int tcp_attach(struct socket *); +uint8_t tcp_tos(struct socket *); +int tcp_emu(struct socket *, struct mbuf *); +int tcp_ctl(struct socket *); +struct tcpcb *tcp_drop(struct tcpcb *tp, int err); + +#ifdef USE_PPP +#define MIN_MRU MINMRU +#define MAX_MRU MAXMRU +#else +#define MIN_MRU 128 +#define MAX_MRU 16384 +#endif + +#ifndef _WIN32 +#define min(x,y) ((x) < (y) ? (x) : (y)) +#define max(x,y) ((x) > (y) ? (x) : (y)) +#endif + +#ifdef _WIN32 +#undef errno +#define errno (WSAGetLastError()) +#endif + +#endif diff --git a/slirp/slirp_config.h b/slirp/slirp_config.h new file mode 100644 index 00000000..896d8022 --- /dev/null +++ b/slirp/slirp_config.h @@ -0,0 +1,185 @@ +/* + * User definable configuration options + */ + +/* Define if you want the connection to be probed */ +/* XXX Not working yet, so ignore this for now */ +#undef PROBE_CONN + +/* Define to 1 if you want KEEPALIVE timers */ +#define DO_KEEPALIVE 0 + +/* Define to MAX interfaces you expect to use at once */ +/* MAX_INTERFACES determines the max. TOTAL number of interfaces (SLIP and PPP) */ +/* MAX_PPP_INTERFACES determines max. number of PPP interfaces */ +#define MAX_INTERFACES 1 +#define MAX_PPP_INTERFACES 1 + +/* Define if you want slirp's socket in /tmp */ +/* XXXXXX Do this in ./configure */ +#undef USE_TMPSOCKET + +/* Define if you want slirp to use cfsetXspeed() on the terminal */ +#undef DO_CFSETSPEED + +/* Define this if you want slirp to write to the tty as fast as it can */ +/* This should only be set if you are using load-balancing, slirp does a */ +/* pretty good job on single modems already, and seting this will make */ +/* interactive sessions less responsive */ +/* XXXXX Talk about having fast modem as unit 0 */ +#undef FULL_BOLT + +/* + * Define if you want slirp to use less CPU + * You will notice a small lag in interactive sessions, but it's not that bad + * Things like Netscape/ftp/etc. are completely unaffected + * This is mainly for sysadmins who have many slirp users + */ +#undef USE_LOWCPU + +/* Define this if your compiler doesn't like prototypes */ +#ifndef __STDC__ +#define NO_PROTOTYPES +#endif + +/*********************************************************/ +/* + * Autoconf defined configuration options + * You shouldn't need to touch any of these + */ + +/* Ignore this */ +#undef DUMMY_PPP + +/* Define if you have unistd.h */ +#define HAVE_UNISTD_H + +/* Define if you have stdlib.h */ +#define HAVE_STDLIB_H + +/* Define if you have sys/ioctl.h */ +#undef HAVE_SYS_IOCTL_H +#ifndef _WIN32 +#define HAVE_SYS_IOCTL_H +#endif + +/* Define if you have sys/filio.h */ +#undef HAVE_SYS_FILIO_H +#ifdef __APPLE__ +#define HAVE_SYS_FILIO_H +#endif + +/* Define if you have strerror */ +#define HAVE_STRERROR + +/* Define according to how time.h should be included */ +#define TIME_WITH_SYS_TIME 0 +#undef HAVE_SYS_TIME_H + +/* Define if you have sys/bitypes.h */ +#undef HAVE_SYS_BITYPES_H + +/* Define if the machine is big endian */ +//#undef HOST_WORDS_BIGENDIAN + +/* Define if you have readv */ +#undef HAVE_READV + +/* Define if iovec needs to be declared */ +#undef DECLARE_IOVEC +#ifdef _WIN32 +#define DECLARE_IOVEC +#endif + +/* Define if you have a POSIX.1 sys/wait.h */ +#undef HAVE_SYS_WAIT_H + +/* Define if you have sys/select.h */ +#undef HAVE_SYS_SELECT_H +#ifndef _WIN32 +#define HAVE_SYS_SELECT_H +#endif + +/* Define if you have strings.h */ +#define HAVE_STRING_H + +/* Define if you have arpa/inet.h */ +#undef HAVE_ARPA_INET_H +#ifndef _WIN32 +#define HAVE_ARPA_INET_H +#endif + +/* Define if you have sys/signal.h */ +#undef HAVE_SYS_SIGNAL_H + +/* Define if you have sys/stropts.h */ +#undef HAVE_SYS_STROPTS_H + +/* Define to whatever your compiler thinks inline should be */ +//#define inline inline + +/* Define to whatever your compiler thinks const should be */ +//#define const const + +/* Define if your compiler doesn't like prototypes */ +#undef NO_PROTOTYPES + +/* Define to sizeof(char) */ +#define SIZEOF_CHAR 1 + +/* Define to sizeof(short) */ +#define SIZEOF_SHORT 2 + +/* Define to sizeof(int) */ +#define SIZEOF_INT 4 + +/* Define to sizeof(char *) */ +#define SIZEOF_CHAR_P (HOST_LONG_BITS / 8) + +/* Define if you have random() */ +#undef HAVE_RANDOM + +/* Define if you have srandom() */ +#undef HAVE_SRANDOM + +/* Define if you have inet_aton */ +#undef HAVE_INET_ATON +#ifndef _WIN32 +#define HAVE_INET_ATON +#endif + +/* Define if you have setenv */ +#undef HAVE_SETENV + +/* Define if you have index() */ +#define HAVE_INDEX + +/* Define if you have bcmp() */ +#undef HAVE_BCMP + +/* Define if you have drand48 */ +#undef HAVE_DRAND48 + +/* Define if you have memmove */ +#define HAVE_MEMMOVE + +/* Define if you have gethostid */ +#define HAVE_GETHOSTID + +/* Define if you DON'T have unix-domain sockets */ +#undef NO_UNIX_SOCKETS +#ifdef _WIN32 +#define NO_UNIX_SOCKETS +#endif + +/* Define if you have revoke() */ +#undef HAVE_REVOKE + +/* Define if you have the sysv method of opening pty's (/dev/ptmx, etc.) */ +#undef HAVE_GRANTPT + +/* Define if you have fchmod */ +#undef HAVE_FCHMOD + +/* Define if you have */ +#undef HAVE_SYS_TYPES32_H diff --git a/slirp/socket.c b/slirp/socket.c new file mode 100644 index 00000000..37ac5cf2 --- /dev/null +++ b/slirp/socket.c @@ -0,0 +1,720 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include "qemu-common.h" +#include +#include "ip_icmp.h" +#ifdef __sun__ +#include +#endif + +static void sofcantrcvmore(struct socket *so); +static void sofcantsendmore(struct socket *so); + +struct socket * +solookup(struct socket *head, struct in_addr laddr, u_int lport, + struct in_addr faddr, u_int fport) +{ + struct socket *so; + + for (so = head->so_next; so != head; so = so->so_next) { + if (so->so_lport == lport && + so->so_laddr.s_addr == laddr.s_addr && + so->so_faddr.s_addr == faddr.s_addr && + so->so_fport == fport) + break; + } + + if (so == head) + return (struct socket *)NULL; + return so; + +} + +/* + * Create a new socket, initialise the fields + * It is the responsibility of the caller to + * insque() it into the correct linked-list + */ +struct socket * +socreate(Slirp *slirp) +{ + struct socket *so; + + so = (struct socket *)malloc(sizeof(struct socket)); + if(so) { + memset(so, 0, sizeof(struct socket)); + so->so_state = SS_NOFDREF; + so->s = -1; + so->slirp = slirp; + so->pollfds_idx = -1; + } + return(so); +} + +/* + * remque and free a socket, clobber cache + */ +void +sofree(struct socket *so) +{ + Slirp *slirp = so->slirp; + + if (so->so_emu==EMU_RSH && so->extra) { + sofree(so->extra); + so->extra=NULL; + } + if (so == slirp->tcp_last_so) { + slirp->tcp_last_so = &slirp->tcb; + } else if (so == slirp->udp_last_so) { + slirp->udp_last_so = &slirp->udb; + } else if (so == slirp->icmp_last_so) { + slirp->icmp_last_so = &slirp->icmp; + } + m_free(so->so_m); + + if(so->so_next && so->so_prev) + remque(so); /* crashes if so is not in a queue */ + + free(so); +} + +size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) +{ + int n, lss, total; + struct sbuf *sb = &so->so_snd; + int len = sb->sb_datalen - sb->sb_cc; + int mss = so->so_tcpcb->t_maxseg; + + DEBUG_CALL("sopreprbuf"); + DEBUG_ARG("so = %lx", (long )so); + + if (len <= 0) + return 0; + + iov[0].iov_base = sb->sb_wptr; + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + if (sb->sb_wptr < sb->sb_rptr) { + iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) + iov[0].iov_len = len; + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } else { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_rptr - sb->sb_data; + if(iov[1].iov_len > len) + iov[1].iov_len = len; + total = iov[0].iov_len + iov[1].iov_len; + if (total > mss) { + lss = total%mss; + if (iov[1].iov_len > lss) { + iov[1].iov_len -= lss; + n = 2; + } else { + lss -= iov[1].iov_len; + iov[0].iov_len -= lss; + n = 1; + } + } else + n = 2; + } else { + if (iov[0].iov_len > mss) + iov[0].iov_len -= iov[0].iov_len%mss; + n = 1; + } + } + if (np) + *np = n; + + return iov[0].iov_len + (n - 1) * iov[1].iov_len; +} + +/* + * Read from so's socket into sb_snd, updating all relevant sbuf fields + * NOTE: This will only be called if it is select()ed for reading, so + * a read() of 0 (or less) means it's disconnected + */ +int +soread(struct socket *so) +{ + int n, nn; + struct sbuf *sb = &so->so_snd; + struct iovec iov[2]; + + DEBUG_CALL("soread"); + DEBUG_ARG("so = %lx", (long )so); + + /* + * No need to check if there's enough room to read. + * soread wouldn't have been called if there weren't + */ + sopreprbuf(so, iov, &n); + +#ifdef HAVE_READV + nn = readv(so->s, (struct iovec *)iov, n); + DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); +#else + nn = qemu_recv(so->s, iov[0].iov_base, iov[0].iov_len,0); +#endif + if (nn <= 0) { + if (nn < 0 && (errno == EINTR || errno == EAGAIN)) + return 0; + else { + DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno))); + sofcantrcvmore(so); + tcp_sockclosed(sototcpcb(so)); + return -1; + } + } + +#ifndef HAVE_READV + /* + * If there was no error, try and read the second time round + * We read again if n = 2 (ie, there's another part of the buffer) + * and we read as much as we could in the first read + * We don't test for <= 0 this time, because there legitimately + * might not be any more data (since the socket is non-blocking), + * a close will be detected on next iteration. + * A return of -1 wont (shouldn't) happen, since it didn't happen above + */ + if (n == 2 && nn == iov[0].iov_len) { + int ret; + ret = qemu_recv(so->s, iov[1].iov_base, iov[1].iov_len,0); + if (ret > 0) + nn += ret; + } + + DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn)); +#endif + + /* Update fields */ + sb->sb_cc += nn; + sb->sb_wptr += nn; + if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_wptr -= sb->sb_datalen; + return nn; +} + +int soreadbuf(struct socket *so, const char *buf, int size) +{ + int n, nn, copy = size; + struct sbuf *sb = &so->so_snd; + struct iovec iov[2]; + + DEBUG_CALL("soreadbuf"); + DEBUG_ARG("so = %lx", (long )so); + + /* + * No need to check if there's enough room to read. + * soread wouldn't have been called if there weren't + */ + if (sopreprbuf(so, iov, &n) < size) + goto err; + + nn = MIN(iov[0].iov_len, copy); + memcpy(iov[0].iov_base, buf, nn); + + copy -= nn; + buf += nn; + + if (copy == 0) + goto done; + + memcpy(iov[1].iov_base, buf, copy); + +done: + /* Update fields */ + sb->sb_cc += size; + sb->sb_wptr += size; + if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_wptr -= sb->sb_datalen; + return size; +err: + + sofcantrcvmore(so); + tcp_sockclosed(sototcpcb(so)); + fprintf(stderr, "soreadbuf buffer to small"); + return -1; +} + +/* + * Get urgent data + * + * When the socket is created, we set it SO_OOBINLINE, + * so when OOB data arrives, we soread() it and everything + * in the send buffer is sent as urgent data + */ +void +sorecvoob(struct socket *so) +{ + struct tcpcb *tp = sototcpcb(so); + + DEBUG_CALL("sorecvoob"); + DEBUG_ARG("so = %lx", (long)so); + + /* + * We take a guess at how much urgent data has arrived. + * In most situations, when urgent data arrives, the next + * read() should get all the urgent data. This guess will + * be wrong however if more data arrives just after the + * urgent data, or the read() doesn't return all the + * urgent data. + */ + soread(so); + tp->snd_up = tp->snd_una + so->so_snd.sb_cc; + tp->t_force = 1; + tcp_output(tp); + tp->t_force = 0; +} + +/* + * Send urgent data + * There's a lot duplicated code here, but... + */ +int +sosendoob(struct socket *so) +{ + struct sbuf *sb = &so->so_rcv; + char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ + + int n, len; + + DEBUG_CALL("sosendoob"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); + + if (so->so_urgc > 2048) + so->so_urgc = 2048; /* XXXX */ + + if (sb->sb_rptr < sb->sb_wptr) { + /* We can send it directly */ + n = slirp_send(so, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */ + so->so_urgc -= n; + + DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc)); + } else { + /* + * Since there's no sendv or sendtov like writev, + * we must copy all data to a linear buffer then + * send it all + */ + len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (len > so->so_urgc) len = so->so_urgc; + memcpy(buff, sb->sb_rptr, len); + so->so_urgc -= len; + if (so->so_urgc) { + n = sb->sb_wptr - sb->sb_data; + if (n > so->so_urgc) n = so->so_urgc; + memcpy((buff + len), sb->sb_data, n); + so->so_urgc -= n; + len += n; + } + n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ +#ifdef DEBUG + if (n != len) + DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n")); +#endif + DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc)); + } + + sb->sb_cc -= n; + sb->sb_rptr += n; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + return n; +} + +/* + * Write data from so_rcv to so's socket, + * updating all sbuf field as necessary + */ +int +sowrite(struct socket *so) +{ + int n,nn; + struct sbuf *sb = &so->so_rcv; + int len = sb->sb_cc; + struct iovec iov[2]; + + DEBUG_CALL("sowrite"); + DEBUG_ARG("so = %lx", (long)so); + + if (so->so_urgc) { + sosendoob(so); + if (sb->sb_cc == 0) + return 0; + } + + /* + * No need to check if there's something to write, + * sowrite wouldn't have been called otherwise + */ + + iov[0].iov_base = sb->sb_rptr; + iov[1].iov_base = NULL; + iov[1].iov_len = 0; + if (sb->sb_rptr < sb->sb_wptr) { + iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; + /* Should never succeed, but... */ + if (iov[0].iov_len > len) iov[0].iov_len = len; + n = 1; + } else { + iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; + if (iov[0].iov_len > len) iov[0].iov_len = len; + len -= iov[0].iov_len; + if (len) { + iov[1].iov_base = sb->sb_data; + iov[1].iov_len = sb->sb_wptr - sb->sb_data; + if (iov[1].iov_len > len) iov[1].iov_len = len; + n = 2; + } else + n = 1; + } + /* Check if there's urgent data to send, and if so, send it */ + +#ifdef HAVE_READV + nn = writev(so->s, (const struct iovec *)iov, n); + + DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn)); +#else + nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len,0); +#endif + /* This should never happen, but people tell me it does *shrug* */ + if (nn < 0 && (errno == EAGAIN || errno == EINTR)) + return 0; + + if (nn <= 0) { + DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n", + so->so_state, errno)); + sofcantsendmore(so); + tcp_sockclosed(sototcpcb(so)); + return -1; + } + +#ifndef HAVE_READV + if (n == 2 && nn == iov[0].iov_len) { + int ret; + ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len,0); + if (ret > 0) + nn += ret; + } + DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn)); +#endif + + /* Update sbuf */ + sb->sb_cc -= nn; + sb->sb_rptr += nn; + if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) + sb->sb_rptr -= sb->sb_datalen; + + /* + * If in DRAIN mode, and there's no more data, set + * it CANTSENDMORE + */ + if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) + sofcantsendmore(so); + + return nn; +} + +/* + * recvfrom() a UDP socket + */ +void +sorecvfrom(struct socket *so) +{ + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + + DEBUG_CALL("sorecvfrom"); + DEBUG_ARG("so = %lx", (long)so); + + if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ + char buff[256]; + int len; + + len = recvfrom(so->s, buff, 256, 0, + (struct sockaddr *)&addr, &addrlen); + /* XXX Check if reply is "correct"? */ + + if(len == -1 || len == 0) { + u_char code=ICMP_UNREACH_PORT; + + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; + + DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n", + errno,strerror(errno))); + icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno)); + } else { + icmp_reflect(so->so_m); + so->so_m = NULL; /* Don't m_free() it again! */ + } + /* No need for this socket anymore, udp_detach it */ + udp_detach(so); + } else { /* A "normal" UDP packet */ + struct mbuf *m; + int len; +#ifdef _WIN32 + unsigned long n; +#else + int n; +#endif + + m = m_get(so->slirp); + if (!m) { + return; + } + m->m_data += IF_MAXLINKHDR; + + /* + * XXX Shouldn't FIONREAD packets destined for port 53, + * but I don't know the max packet size for DNS lookups + */ + len = M_FREEROOM(m); + /* if (so->so_fport != htons(53)) { */ + ioctlsocket(so->s, FIONREAD, &n); + + if (n > len) { + n = (m->m_data - m->m_dat) + m->m_len + n + 1; + m_inc(m, n); + len = M_FREEROOM(m); + } + /* } */ + + m->m_len = recvfrom(so->s, m->m_data, len, 0, + (struct sockaddr *)&addr, &addrlen); + DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n", + m->m_len, errno,strerror(errno))); + if(m->m_len<0) { + u_char code=ICMP_UNREACH_PORT; + + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET; + + DEBUG_MISC((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code)); + icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno)); + m_free(m); + } else { + /* + * Hack: domain name lookup will be used the most for UDP, + * and since they'll only be used once there's no need + * for the 4 minute (or whatever) timeout... So we time them + * out much quicker (10 seconds for now...) + */ + if (so->so_expire) { + if (so->so_fport == htons(53)) + so->so_expire = curtime + SO_EXPIREFAST; + else + so->so_expire = curtime + SO_EXPIRE; + } + + /* + * If this packet was destined for CTL_ADDR, + * make it look like that's where it came from, done by udp_output + */ + udp_output(so, m, &addr); + } /* rx error */ + } /* if ping packet */ +} + +/* + * sendto() a socket + */ +int +sosendto(struct socket *so, struct mbuf *m) +{ + Slirp *slirp = so->slirp; + int ret; + struct sockaddr_in addr; + + DEBUG_CALL("sosendto"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + /* It's an alias */ + if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { + if (get_dns_addr(&addr.sin_addr) < 0) + addr.sin_addr = loopback_addr; + } else { + addr.sin_addr = loopback_addr; + } + } else + addr.sin_addr = so->so_faddr; + addr.sin_port = so->so_fport; + + DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n", ntohs(addr.sin_port), inet_ntoa(addr.sin_addr))); + + /* Don't care what port we get */ + ret = sendto(so->s, m->m_data, m->m_len, 0, + (struct sockaddr *)&addr, sizeof (struct sockaddr)); + if (ret < 0) + return -1; + + /* + * Kill the socket if there's no reply in 4 minutes, + * but only if it's an expirable socket + */ + if (so->so_expire) + so->so_expire = curtime + SO_EXPIRE; + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ + return 0; +} + +/* + * Listen for incoming TCP connections + */ +struct socket * +tcp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr, + u_int lport, int flags) +{ + struct sockaddr_in addr; + struct socket *so; + int s, opt = 1; + socklen_t addrlen = sizeof(addr); + memset(&addr, 0, addrlen); + + DEBUG_CALL("tcp_listen"); + DEBUG_ARG("haddr = %x", haddr); + DEBUG_ARG("hport = %d", hport); + DEBUG_ARG("laddr = %x", laddr); + DEBUG_ARG("lport = %d", lport); + DEBUG_ARG("flags = %x", flags); + + so = socreate(slirp); + if (!so) { + return NULL; + } + + /* Don't tcp_attach... we don't need so_snd nor so_rcv */ + if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) { + free(so); + return NULL; + } + insque(so, &slirp->tcb); + + /* + * SS_FACCEPTONCE sockets must time out. + */ + if (flags & SS_FACCEPTONCE) + so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2; + + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= (SS_FACCEPTCONN | flags); + so->so_lport = lport; /* Kept in network format */ + so->so_laddr.s_addr = laddr; /* Ditto */ + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = haddr; + addr.sin_port = hport; + + if (((s = qemu_socket(AF_INET,SOCK_STREAM,0)) < 0) || + (socket_set_fast_reuse(s) < 0) || + (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) || + (listen(s,1) < 0)) { + int tmperrno = errno; /* Don't clobber the real reason we failed */ + + close(s); + sofree(so); + /* Restore the real errno */ +#ifdef _WIN32 + WSASetLastError(tmperrno); +#else + errno = tmperrno; +#endif + return NULL; + } + qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); + + getsockname(s,(struct sockaddr *)&addr,&addrlen); + so->so_fport = addr.sin_port; + if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr) + so->so_faddr = slirp->vhost_addr; + else + so->so_faddr = addr.sin_addr; + + so->s = s; + return so; +} + +/* + * Various session state calls + * XXX Should be #define's + * The socket state stuff needs work, these often get call 2 or 3 + * times each when only 1 was needed + */ +void +soisfconnecting(struct socket *so) +{ + so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE| + SS_FCANTSENDMORE|SS_FWDRAIN); + so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ +} + +void +soisfconnected(struct socket *so) +{ + so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF); + so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ +} + +static void +sofcantrcvmore(struct socket *so) +{ + if ((so->so_state & SS_NOFDREF) == 0) { + shutdown(so->s,0); + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTSENDMORE) { + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; /* Don't select it */ + } else { + so->so_state |= SS_FCANTRCVMORE; + } +} + +static void +sofcantsendmore(struct socket *so) +{ + if ((so->so_state & SS_NOFDREF) == 0) { + shutdown(so->s,1); /* send FIN to fhost */ + } + so->so_state &= ~(SS_ISFCONNECTING); + if (so->so_state & SS_FCANTRCVMORE) { + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; /* as above */ + } else { + so->so_state |= SS_FCANTSENDMORE; + } +} + +/* + * Set write drain mode + * Set CANTSENDMORE once all data has been write()n + */ +void +sofwdrain(struct socket *so) +{ + if (so->so_rcv.sb_cc) + so->so_state |= SS_FWDRAIN; + else + sofcantsendmore(so); +} diff --git a/slirp/socket.h b/slirp/socket.h new file mode 100644 index 00000000..57e0407e --- /dev/null +++ b/slirp/socket.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#ifndef _SLIRP_SOCKET_H_ +#define _SLIRP_SOCKET_H_ + +#define SO_EXPIRE 240000 +#define SO_EXPIREFAST 10000 + +/* + * Our socket structure + */ + +struct socket { + struct socket *so_next,*so_prev; /* For a linked list of sockets */ + + int s; /* The actual socket */ + + int pollfds_idx; /* GPollFD GArray index */ + + Slirp *slirp; /* managing slirp instance */ + + /* XXX union these with not-yet-used sbuf params */ + struct mbuf *so_m; /* Pointer to the original SYN packet, + * for non-blocking connect()'s, and + * PING reply's */ + struct tcpiphdr *so_ti; /* Pointer to the original ti within + * so_mconn, for non-blocking connections */ + int so_urgc; + struct in_addr so_faddr; /* foreign host table entry */ + struct in_addr so_laddr; /* local host table entry */ + uint16_t so_fport; /* foreign port */ + uint16_t so_lport; /* local port */ + + uint8_t so_iptos; /* Type of service */ + uint8_t so_emu; /* Is the socket emulated? */ + + u_char so_type; /* Type of socket, UDP or TCP */ + int so_state; /* internal state flags SS_*, below */ + + struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ + u_int so_expire; /* When the socket will expire */ + + int so_queued; /* Number of packets queued from this socket */ + int so_nqueued; /* Number of packets queued in a row + * Used to determine when to "downgrade" a session + * from fastq to batchq */ + + struct sbuf so_rcv; /* Receive buffer */ + struct sbuf so_snd; /* Send buffer */ + void * extra; /* Extra pointer */ +}; + + +/* + * Socket state bits. (peer means the host on the Internet, + * local host means the host on the other end of the modem) + */ +#define SS_NOFDREF 0x001 /* No fd reference */ + +#define SS_ISFCONNECTING 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ +#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ +#define SS_FCANTRCVMORE 0x008 /* Socket can't receive more from peer (for half-closes) */ +#define SS_FCANTSENDMORE 0x010 /* Socket can't send more to peer (for half-closes) */ +#define SS_FWDRAIN 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ + +#define SS_CTL 0x080 +#define SS_FACCEPTCONN 0x100 /* Socket is accepting connections from a host on the internet */ +#define SS_FACCEPTONCE 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ + +#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ +#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ +#define SS_INCOMING 0x2000 /* Connection was initiated by a host on the internet */ + +struct socket * solookup(struct socket *, struct in_addr, u_int, struct in_addr, u_int); +struct socket * socreate(Slirp *); +void sofree(struct socket *); +int soread(struct socket *); +void sorecvoob(struct socket *); +int sosendoob(struct socket *); +int sowrite(struct socket *); +void sorecvfrom(struct socket *); +int sosendto(struct socket *, struct mbuf *); +struct socket * tcp_listen(Slirp *, uint32_t, u_int, uint32_t, u_int, + int); +void soisfconnecting(register struct socket *); +void soisfconnected(register struct socket *); +void sofwdrain(struct socket *); +struct iovec; /* For win32 */ +size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); +int soreadbuf(struct socket *so, const char *buf, int size); + +#endif /* _SOCKET_H_ */ diff --git a/slirp/tcp.h b/slirp/tcp.h new file mode 100644 index 00000000..2e2b4033 --- /dev/null +++ b/slirp/tcp.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp + */ + +#ifndef _TCP_H_ +#define _TCP_H_ + +typedef uint32_t tcp_seq; + +#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ +#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ + +#define TCP_SNDSPACE 8192 +#define TCP_RCVSPACE 8192 + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +#define tcphdr slirp_tcphdr +struct tcphdr { + uint16_t th_sport; /* source port */ + uint16_t th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#ifdef HOST_WORDS_BIGENDIAN + uint8_t th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#else + uint8_t th_x2:4, /* (unused) */ + th_off:4; /* data offset */ +#endif + uint8_t th_flags; + uint16_t th_win; /* window */ + uint16_t th_sum; /* checksum */ + uint16_t th_urp; /* urgent pointer */ +}; + +#include "tcp_var.h" + +#ifndef TH_FIN +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#endif + +#ifndef TCPOPT_EOL +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ +#define TCPOPT_SACK 5 /* Experimental */ +#define TCPOPT_TIMESTAMP 8 + +#define TCPOPT_TSTAMP_HDR \ + (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP) +#endif + +#ifndef TCPOLEN_MAXSEG +#define TCPOLEN_MAXSEG 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ +#endif + +/* + * Default maximum segment size for TCP. + * With an IP MSS of 576, this is 536, + * but 512 is probably more convenient. + * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). + * + * We make this 1460 because we only care about Ethernet in the qemu context. + */ +#undef TCP_MSS +#define TCP_MSS 1460 + +#undef TCP_MAXWIN +#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ + +#undef TCP_MAX_WINSHIFT +#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +/* + * User-settable options (used with setsockopt). + * + * We don't use the system headers on unix because we have conflicting + * local structures. We can't avoid the system definitions on Windows, + * so we undefine them. + */ +#undef TCP_NODELAY +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#undef TCP_MAXSEG + +/* + * TCP FSM state definitions. + * Per RFC793, September, 1981. + */ + +#define TCP_NSTATES 11 + +#define TCPS_CLOSED 0 /* closed */ +#define TCPS_LISTEN 1 /* listening for connection */ +#define TCPS_SYN_SENT 2 /* active, have sent syn */ +#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ +/* states < TCPS_ESTABLISHED are those where connections not established */ +#define TCPS_ESTABLISHED 4 /* established */ +#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ +/* states > TCPS_CLOSE_WAIT are those where user has closed */ +#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ +#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ +#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ +/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ +#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ +#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ + +#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) +#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) +#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) + +/* + * TCP sequence numbers are 32 bit integers operated + * on with modular arithmetic. These macros can be + * used to compare such integers. + */ +#define SEQ_LT(a,b) ((int)((a)-(b)) < 0) +#define SEQ_LEQ(a,b) ((int)((a)-(b)) <= 0) +#define SEQ_GT(a,b) ((int)((a)-(b)) > 0) +#define SEQ_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Macros to initialize tcp sequence numbers for + * send and receive from initial send and receive + * sequence numbers. + */ +#define tcp_rcvseqinit(tp) \ + (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 + +#define tcp_sendseqinit(tp) \ + (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss + +#define TCP_ISSINCR (125*1024) /* increment for tcp_iss each second */ + +#endif diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c new file mode 100644 index 00000000..00a77b4a --- /dev/null +++ b/slirp/tcp_input.c @@ -0,0 +1,1496 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 + * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include +#include "ip_icmp.h" + +#define TCPREXMTTHRESH 3 + +#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) + +/* for modulo comparisons of timestamps */ +#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0) +#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0) + +/* + * Insert segment ti into reassembly queue of tcp with + * control block tp. Return TH_FIN if reassembly now includes + * a segment with FIN. The macro form does the common case inline + * (segment is the next to be received on an established connection, + * and the queue is empty), avoiding linkage into and removal + * from the queue and repetition of various conversions. + * Set DELACK for segments received in order, but ack immediately + * when segments are out of order (so fast retransmit can work). + */ +#ifdef TCP_ACK_HACK +#define TCP_REASS(tp, ti, m, so, flags) {\ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + tcpfrag_list_empty(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) {\ + if (ti->ti_flags & TH_PUSH) \ + tp->t_flags |= TF_ACKNOW; \ + else \ + tp->t_flags |= TF_DELACK; \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + if (so->so_emu) { \ + if (tcp_emu((so),(m))) sbappend((so), (m)); \ + } else \ + sbappend((so), (m)); \ + } else {\ + (flags) = tcp_reass((tp), (ti), (m)); \ + tp->t_flags |= TF_ACKNOW; \ + } \ +} +#else +#define TCP_REASS(tp, ti, m, so, flags) { \ + if ((ti)->ti_seq == (tp)->rcv_nxt && \ + tcpfrag_list_empty(tp) && \ + (tp)->t_state == TCPS_ESTABLISHED) { \ + tp->t_flags |= TF_DELACK; \ + (tp)->rcv_nxt += (ti)->ti_len; \ + flags = (ti)->ti_flags & TH_FIN; \ + if (so->so_emu) { \ + if (tcp_emu((so),(m))) sbappend(so, (m)); \ + } else \ + sbappend((so), (m)); \ + } else { \ + (flags) = tcp_reass((tp), (ti), (m)); \ + tp->t_flags |= TF_ACKNOW; \ + } \ +} +#endif +static void tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, + struct tcpiphdr *ti); +static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); + +static int +tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, + struct mbuf *m) +{ + register struct tcpiphdr *q; + struct socket *so = tp->t_socket; + int flags; + + /* + * Call with ti==NULL after become established to + * force pre-ESTABLISHED data up to user socket. + */ + if (ti == NULL) + goto present; + + /* + * Find a segment which begins after this one does. + */ + for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); + q = tcpiphdr_next(q)) + if (SEQ_GT(q->ti_seq, ti->ti_seq)) + break; + + /* + * If there is a preceding segment, it may provide some of + * our data already. If so, drop the data from the incoming + * segment. If it provides all of our data, drop us. + */ + if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { + register int i; + q = tcpiphdr_prev(q); + /* conversion to int (in i) handles seq wraparound */ + i = q->ti_seq + q->ti_len - ti->ti_seq; + if (i > 0) { + if (i >= ti->ti_len) { + m_free(m); + /* + * Try to present any queued data + * at the left window edge to the user. + * This is needed after the 3-WHS + * completes. + */ + goto present; /* ??? */ + } + m_adj(m, i); + ti->ti_len -= i; + ti->ti_seq += i; + } + q = tcpiphdr_next(q); + } + ti->ti_mbuf = m; + + /* + * While we overlap succeeding segments trim them or, + * if they are completely covered, dequeue them. + */ + while (!tcpfrag_list_end(q, tp)) { + register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; + if (i <= 0) + break; + if (i < q->ti_len) { + q->ti_seq += i; + q->ti_len -= i; + m_adj(q->ti_mbuf, i); + break; + } + q = tcpiphdr_next(q); + m = tcpiphdr_prev(q)->ti_mbuf; + remque(tcpiphdr2qlink(tcpiphdr_prev(q))); + m_free(m); + } + + /* + * Stick new segment in its place. + */ + insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); + +present: + /* + * Present data to user, advancing rcv_nxt through + * completed sequence space. + */ + if (!TCPS_HAVEESTABLISHED(tp->t_state)) + return (0); + ti = tcpfrag_list_first(tp); + if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) + return (0); + if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) + return (0); + do { + tp->rcv_nxt += ti->ti_len; + flags = ti->ti_flags & TH_FIN; + remque(tcpiphdr2qlink(ti)); + m = ti->ti_mbuf; + ti = tcpiphdr_next(ti); + if (so->so_state & SS_FCANTSENDMORE) + m_free(m); + else { + if (so->so_emu) { + if (tcp_emu(so,m)) sbappend(so, m); + } else + sbappend(so, m); + } + } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); + return (flags); +} + +/* + * TCP input routine, follows pages 65-76 of the + * protocol specification dated September, 1981 very closely. + */ +void +tcp_input(struct mbuf *m, int iphlen, struct socket *inso) +{ + struct ip save_ip, *ip; + register struct tcpiphdr *ti; + caddr_t optp = NULL; + int optlen = 0; + int len, tlen, off; + register struct tcpcb *tp = NULL; + register int tiflags; + struct socket *so = NULL; + int todrop, acked, ourfinisacked, needoutput = 0; + int iss = 0; + u_long tiwin; + int ret; + struct ex_list *ex_ptr; + Slirp *slirp; + + DEBUG_CALL("tcp_input"); + DEBUG_ARGS((dfd, " m = %8lx iphlen = %2d inso = %lx\n", + (long )m, iphlen, (long )inso )); + + /* + * If called with m == 0, then we're continuing the connect + */ + if (m == NULL) { + so = inso; + slirp = so->slirp; + + /* Re-set a few variables */ + tp = sototcpcb(so); + m = so->so_m; + so->so_m = NULL; + ti = so->so_ti; + tiwin = ti->ti_win; + tiflags = ti->ti_flags; + + goto cont_conn; + } + slirp = m->slirp; + + /* + * Get IP and TCP header together in first mbuf. + * Note: IP leaves IP header in first mbuf. + */ + ti = mtod(m, struct tcpiphdr *); + if (iphlen > sizeof(struct ip )) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen=sizeof(struct ip ); + } + /* XXX Check if too short */ + + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + ip=mtod(m, struct ip *); + save_ip = *ip; + save_ip.ip_len+= iphlen; + + /* + * Checksum extended TCP header and data. + */ + tlen = ((struct ip *)ti)->ip_len; + tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; + memset(&ti->ti_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr)); + ti->ti_x1 = 0; + ti->ti_len = htons((uint16_t)tlen); + len = sizeof(struct ip ) + tlen; + if(cksum(m, len)) { + goto drop; + } + + /* + * Check that TCP offset makes sense, + * pull out TCP options and adjust length. XXX + */ + off = ti->ti_off << 2; + if (off < sizeof (struct tcphdr) || off > tlen) { + goto drop; + } + tlen -= off; + ti->ti_len = tlen; + if (off > sizeof (struct tcphdr)) { + optlen = off - sizeof (struct tcphdr); + optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr); + } + tiflags = ti->ti_flags; + + /* + * Convert TCP protocol specific fields to host format. + */ + NTOHL(ti->ti_seq); + NTOHL(ti->ti_ack); + NTOHS(ti->ti_win); + NTOHS(ti->ti_urp); + + /* + * Drop TCP, IP headers and TCP options. + */ + m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + + /* + * Locate pcb for segment. + */ +findso: + so = slirp->tcp_last_so; + if (so->so_fport != ti->ti_dport || + so->so_lport != ti->ti_sport || + so->so_laddr.s_addr != ti->ti_src.s_addr || + so->so_faddr.s_addr != ti->ti_dst.s_addr) { + so = solookup(&slirp->tcb, ti->ti_src, ti->ti_sport, + ti->ti_dst, ti->ti_dport); + if (so) + slirp->tcp_last_so = so; + } + + /* + * If the state is CLOSED (i.e., TCB does not exist) then + * all data in the incoming segment is discarded. + * If the TCB exists but is in CLOSED state, it is embryonic, + * but should either do a listen or a connect soon. + * + * state == CLOSED means we've done socreate() but haven't + * attached it to a protocol yet... + * + * XXX If a TCB does not exist, and the TH_SYN flag is + * the only flag set, then create a session, mark it + * as if it was LISTENING, and continue... + */ + if (so == NULL) { + if (slirp->restricted) { + /* Any hostfwds will have an existing socket, so we only get here + * for non-hostfwd connections. These should be dropped, unless it + * happens to be a guestfwd. + */ + for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_fport == ti->ti_dport && + ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { + break; + } + } + if (!ex_ptr) { + goto dropwithreset; + } + } + + if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN) + goto dropwithreset; + + if ((so = socreate(slirp)) == NULL) + goto dropwithreset; + if (tcp_attach(so) < 0) { + free(so); /* Not sofree (if it failed, it's not insqued) */ + goto dropwithreset; + } + + sbreserve(&so->so_snd, TCP_SNDSPACE); + sbreserve(&so->so_rcv, TCP_RCVSPACE); + + so->so_laddr = ti->ti_src; + so->so_lport = ti->ti_sport; + so->so_faddr = ti->ti_dst; + so->so_fport = ti->ti_dport; + + if ((so->so_iptos = tcp_tos(so)) == 0) + so->so_iptos = ((struct ip *)ti)->ip_tos; + + tp = sototcpcb(so); + tp->t_state = TCPS_LISTEN; + } + + /* + * If this is a still-connecting socket, this probably + * a retransmit of the SYN. Whether it's a retransmit SYN + * or something else, we nuke it. + */ + if (so->so_state & SS_ISFCONNECTING) + goto drop; + + tp = sototcpcb(so); + + /* XXX Should never fail */ + if (tp == NULL) + goto dropwithreset; + if (tp->t_state == TCPS_CLOSED) + goto drop; + + tiwin = ti->ti_win; + + /* + * Segment received on connection. + * Reset idle time and keep-alive timer. + */ + tp->t_idle = 0; + if (SO_OPTIONS) + tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; + else + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; + + /* + * Process options if not in LISTEN state, + * else do it below (after getting remote address). + */ + if (optp && tp->t_state != TCPS_LISTEN) + tcp_dooptions(tp, (u_char *)optp, optlen, ti); + + /* + * Header prediction: check for the two common cases + * of a uni-directional data xfer. If the packet has + * no control flags, is in-sequence, the window didn't + * change and we're not retransmitting, it's a + * candidate. If the length is zero and the ack moved + * forward, we're the sender side of the xfer. Just + * free the data acked & wake any higher level process + * that was blocked waiting for space. If the length + * is non-zero and the ack didn't move, we're the + * receiver side. If we're getting packets in-order + * (the reassembly queue is empty), add the data to + * the socket buffer and note that we need a delayed ack. + * + * XXX Some of these tests are not needed + * eg: the tiwin == tp->snd_wnd prevents many more + * predictions.. with no *real* advantage.. + */ + if (tp->t_state == TCPS_ESTABLISHED && + (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK && + ti->ti_seq == tp->rcv_nxt && + tiwin && tiwin == tp->snd_wnd && + tp->snd_nxt == tp->snd_max) { + if (ti->ti_len == 0) { + if (SEQ_GT(ti->ti_ack, tp->snd_una) && + SEQ_LEQ(ti->ti_ack, tp->snd_max) && + tp->snd_cwnd >= tp->snd_wnd) { + /* + * this is a pure ack for outstanding data. + */ + if (tp->t_rtt && + SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(tp, tp->t_rtt); + acked = ti->ti_ack - tp->snd_una; + sbdrop(&so->so_snd, acked); + tp->snd_una = ti->ti_ack; + m_free(m); + + /* + * If all outstanding data are acked, stop + * retransmit timer, otherwise restart timer + * using current (possibly backed-off) value. + * If process is waiting for space, + * wakeup/selwakeup/signal. If data + * are ready to send, let tcp_output + * decide between more output or persist. + */ + if (tp->snd_una == tp->snd_max) + tp->t_timer[TCPT_REXMT] = 0; + else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + /* + * This is called because sowwakeup might have + * put data into so_snd. Since we don't so sowwakeup, + * we don't need this.. XXX??? + */ + if (so->so_snd.sb_cc) + (void) tcp_output(tp); + + return; + } + } else if (ti->ti_ack == tp->snd_una && + tcpfrag_list_empty(tp) && + ti->ti_len <= sbspace(&so->so_rcv)) { + /* + * this is a pure, in-sequence data packet + * with nothing on the reassembly queue and + * we have enough buffer space to take it. + */ + tp->rcv_nxt += ti->ti_len; + /* + * Add data to socket buffer. + */ + if (so->so_emu) { + if (tcp_emu(so,m)) sbappend(so, m); + } else + sbappend(so, m); + + /* + * If this is a short packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + * + * It is better to not delay acks at all to maximize + * TCP throughput. See RFC 2581. + */ + tp->t_flags |= TF_ACKNOW; + tcp_output(tp); + return; + } + } /* header prediction */ + /* + * Calculate amount of space in receive window, + * and then do TCP input processing. + * Receive window is amount of space in rcv queue, + * but not less than advertised window. + */ + { int win; + win = sbspace(&so->so_rcv); + if (win < 0) + win = 0; + tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt)); + } + + switch (tp->t_state) { + + /* + * If the state is LISTEN then ignore segment if it contains an RST. + * If the segment contains an ACK then it is bad and send a RST. + * If it does not contain a SYN then it is not interesting; drop it. + * Don't bother responding if the destination was a broadcast. + * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial + * tp->iss, and send a segment: + * + * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. + * Fill in remote peer address fields if not previously specified. + * Enter SYN_RECEIVED state, and process any other fields of this + * segment in this state. + */ + case TCPS_LISTEN: { + + if (tiflags & TH_RST) + goto drop; + if (tiflags & TH_ACK) + goto dropwithreset; + if ((tiflags & TH_SYN) == 0) + goto drop; + + /* + * This has way too many gotos... + * But a bit of spaghetti code never hurt anybody :) + */ + + /* + * If this is destined for the control address, then flag to + * tcp_ctl once connected, otherwise connect + */ + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && + so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { + /* May be an add exec */ + for (ex_ptr = slirp->exec_list; ex_ptr; + ex_ptr = ex_ptr->ex_next) { + if(ex_ptr->ex_fport == so->so_fport && + so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { + so->so_state |= SS_CTL; + break; + } + } + if (so->so_state & SS_CTL) { + goto cont_input; + } + } + /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ + } + + if (so->so_emu & EMU_NOCONNECT) { + so->so_emu &= ~EMU_NOCONNECT; + goto cont_input; + } + + if ((tcp_fconnect(so) == -1) && +#if defined(_WIN32) + socket_error() != WSAEWOULDBLOCK +#else + (errno != EINPROGRESS) && (errno != EWOULDBLOCK) +#endif + ) { + u_char code=ICMP_UNREACH_NET; + DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n", + errno,strerror(errno))); + if(errno == ECONNREFUSED) { + /* ACK the SYN, send RST to refuse the connection */ + tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0, + TH_RST|TH_ACK); + } else { + if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST; + HTONL(ti->ti_seq); /* restore tcp header */ + HTONL(ti->ti_ack); + HTONS(ti->ti_win); + HTONS(ti->ti_urp); + m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + m->m_len += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr); + *ip=save_ip; + icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno)); + } + tcp_close(tp); + m_free(m); + } else { + /* + * Haven't connected yet, save the current mbuf + * and ti, and return + * XXX Some OS's don't tell us whether the connect() + * succeeded or not. So we must time it out. + */ + so->so_m = m; + so->so_ti = ti; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->t_state = TCPS_SYN_RECEIVED; + tcp_template(tp); + } + return; + + cont_conn: + /* m==NULL + * Check if the connect succeeded + */ + if (so->so_state & SS_NOFDREF) { + tp = tcp_close(tp); + goto dropwithreset; + } + cont_input: + tcp_template(tp); + + if (optp) + tcp_dooptions(tp, (u_char *)optp, optlen, ti); + + if (iss) + tp->iss = iss; + else + tp->iss = slirp->tcp_iss; + slirp->tcp_iss += TCP_ISSINCR/2; + tp->irs = ti->ti_seq; + tcp_sendseqinit(tp); + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + tp->t_state = TCPS_SYN_RECEIVED; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + goto trimthenstep6; + } /* case TCPS_LISTEN */ + + /* + * If the state is SYN_SENT: + * if seg contains an ACK, but not for our SYN, drop the input. + * if seg contains a RST, then drop the connection. + * if seg does not contain SYN, then drop it. + * Otherwise this is an acceptable SYN segment + * initialize tp->rcv_nxt and tp->irs + * if seg contains ack then advance tp->snd_una + * if SYN has been acked change to ESTABLISHED else SYN_RCVD state + * arrange for segment to be acked (eventually) + * continue processing rest of data/controls, beginning with URG + */ + case TCPS_SYN_SENT: + if ((tiflags & TH_ACK) && + (SEQ_LEQ(ti->ti_ack, tp->iss) || + SEQ_GT(ti->ti_ack, tp->snd_max))) + goto dropwithreset; + + if (tiflags & TH_RST) { + if (tiflags & TH_ACK) { + tcp_drop(tp, 0); /* XXX Check t_softerror! */ + } + goto drop; + } + + if ((tiflags & TH_SYN) == 0) + goto drop; + if (tiflags & TH_ACK) { + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + } + + tp->t_timer[TCPT_REXMT] = 0; + tp->irs = ti->ti_seq; + tcp_rcvseqinit(tp); + tp->t_flags |= TF_ACKNOW; + if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { + soisfconnected(so); + tp->t_state = TCPS_ESTABLISHED; + + (void) tcp_reass(tp, (struct tcpiphdr *)0, + (struct mbuf *)0); + /* + * if we didn't have to retransmit the SYN, + * use its rtt as our initial srtt & rtt var. + */ + if (tp->t_rtt) + tcp_xmit_timer(tp, tp->t_rtt); + } else + tp->t_state = TCPS_SYN_RECEIVED; + +trimthenstep6: + /* + * Advance ti->ti_seq to correspond to first data byte. + * If data, trim to stay within window, + * dropping FIN if necessary. + */ + ti->ti_seq++; + if (ti->ti_len > tp->rcv_wnd) { + todrop = ti->ti_len - tp->rcv_wnd; + m_adj(m, -todrop); + ti->ti_len = tp->rcv_wnd; + tiflags &= ~TH_FIN; + } + tp->snd_wl1 = ti->ti_seq - 1; + tp->rcv_up = ti->ti_seq; + goto step6; + } /* switch tp->t_state */ + /* + * States other than LISTEN or SYN_SENT. + * Check that at least some bytes of segment are within + * receive window. If segment begins before rcv_nxt, + * drop leading data (and SYN); if nothing left, just ack. + */ + todrop = tp->rcv_nxt - ti->ti_seq; + if (todrop > 0) { + if (tiflags & TH_SYN) { + tiflags &= ~TH_SYN; + ti->ti_seq++; + if (ti->ti_urp > 1) + ti->ti_urp--; + else + tiflags &= ~TH_URG; + todrop--; + } + /* + * Following if statement from Stevens, vol. 2, p. 960. + */ + if (todrop > ti->ti_len + || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { + /* + * Any valid FIN must be to the left of the window. + * At this point the FIN must be a duplicate or out + * of sequence; drop it. + */ + tiflags &= ~TH_FIN; + + /* + * Send an ACK to resynchronize and drop any data. + * But keep on processing for RST or ACK. + */ + tp->t_flags |= TF_ACKNOW; + todrop = ti->ti_len; + } + m_adj(m, todrop); + ti->ti_seq += todrop; + ti->ti_len -= todrop; + if (ti->ti_urp > todrop) + ti->ti_urp -= todrop; + else { + tiflags &= ~TH_URG; + ti->ti_urp = 0; + } + } + /* + * If new data are received on a connection after the + * user processes are gone, then RST the other end. + */ + if ((so->so_state & SS_NOFDREF) && + tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) { + tp = tcp_close(tp); + goto dropwithreset; + } + + /* + * If segment ends after window, drop trailing data + * (and PUSH and FIN); if nothing left, just ACK. + */ + todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd); + if (todrop > 0) { + if (todrop >= ti->ti_len) { + /* + * If a new connection request is received + * while in TIME_WAIT, drop the old connection + * and start over if the sequence numbers + * are above the previous ones. + */ + if (tiflags & TH_SYN && + tp->t_state == TCPS_TIME_WAIT && + SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { + iss = tp->rcv_nxt + TCP_ISSINCR; + tp = tcp_close(tp); + goto findso; + } + /* + * If window is closed can only take segments at + * window edge, and have to drop data and PUSH from + * incoming segments. Continue processing, but + * remember to ack. Otherwise, drop segment + * and ack. + */ + if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { + tp->t_flags |= TF_ACKNOW; + } else { + goto dropafterack; + } + } + m_adj(m, -todrop); + ti->ti_len -= todrop; + tiflags &= ~(TH_PUSH|TH_FIN); + } + + /* + * If the RST bit is set examine the state: + * SYN_RECEIVED STATE: + * If passive open, return to LISTEN state. + * If active open, inform user that connection was refused. + * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: + * Inform user that connection was reset, and close tcb. + * CLOSING, LAST_ACK, TIME_WAIT STATES + * Close the tcb. + */ + if (tiflags&TH_RST) switch (tp->t_state) { + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_CLOSED; + tcp_close(tp); + goto drop; + + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + tcp_close(tp); + goto drop; + } + + /* + * If a SYN is in the window, then this is an + * error and we send an RST and drop the connection. + */ + if (tiflags & TH_SYN) { + tp = tcp_drop(tp,0); + goto dropwithreset; + } + + /* + * If the ACK bit is off we drop the segment and return. + */ + if ((tiflags & TH_ACK) == 0) goto drop; + + /* + * Ack processing. + */ + switch (tp->t_state) { + /* + * In SYN_RECEIVED state if the ack ACKs our SYN then enter + * ESTABLISHED state and continue processing, otherwise + * send an RST. una<=ack<=max + */ + case TCPS_SYN_RECEIVED: + + if (SEQ_GT(tp->snd_una, ti->ti_ack) || + SEQ_GT(ti->ti_ack, tp->snd_max)) + goto dropwithreset; + tp->t_state = TCPS_ESTABLISHED; + /* + * The sent SYN is ack'ed with our sequence number +1 + * The first data byte already in the buffer will get + * lost if no correction is made. This is only needed for + * SS_CTL since the buffer is empty otherwise. + * tp->snd_una++; or: + */ + tp->snd_una=ti->ti_ack; + if (so->so_state & SS_CTL) { + /* So tcp_ctl reports the right state */ + ret = tcp_ctl(so); + if (ret == 1) { + soisfconnected(so); + so->so_state &= ~SS_CTL; /* success XXX */ + } else if (ret == 2) { + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_NOFDREF; /* CTL_CMD */ + } else { + needoutput = 1; + tp->t_state = TCPS_FIN_WAIT_1; + } + } else { + soisfconnected(so); + } + + (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); + tp->snd_wl1 = ti->ti_seq - 1; + /* Avoid ack processing; snd_una==ti_ack => dup ack */ + goto synrx_to_est; + /* fall into ... */ + + /* + * In ESTABLISHED state: drop duplicate ACKs; ACK out of range + * ACKs. If the ack is in the range + * tp->snd_una < ti->ti_ack <= tp->snd_max + * then advance tp->snd_una to ti->ti_ack and drop + * data from the retransmission queue. If this ACK reflects + * more up to date window information we update our window information. + */ + case TCPS_ESTABLISHED: + case TCPS_FIN_WAIT_1: + case TCPS_FIN_WAIT_2: + case TCPS_CLOSE_WAIT: + case TCPS_CLOSING: + case TCPS_LAST_ACK: + case TCPS_TIME_WAIT: + + if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { + if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { + DEBUG_MISC((dfd, " dup ack m = %lx so = %lx\n", + (long )m, (long )so)); + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change), the ack is the biggest we've + * seen and we've seen exactly our rexmt + * threshold of them, assume a packet + * has been dropped and retransmit it. + * Kludge snd_nxt & the congestion + * window so we send only this one + * packet. + * + * We know we're losing at the current + * window size so do congestion avoidance + * (set ssthresh to half the current window + * and pull our congestion window back to + * the new ssthresh). + * + * Dup acks mean that packets have left the + * network (they're now cached at the receiver) + * so bump cwnd by the amount in the receiver + * to keep a constant cwnd packets in the + * network. + */ + if (tp->t_timer[TCPT_REXMT] == 0 || + ti->ti_ack != tp->snd_una) + tp->t_dupacks = 0; + else if (++tp->t_dupacks == TCPREXMTTHRESH) { + tcp_seq onxt = tp->snd_nxt; + u_int win = + min(tp->snd_wnd, tp->snd_cwnd) / 2 / + tp->t_maxseg; + + if (win < 2) + win = 2; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = ti->ti_ack; + tp->snd_cwnd = tp->t_maxseg; + (void) tcp_output(tp); + tp->snd_cwnd = tp->snd_ssthresh + + tp->t_maxseg * tp->t_dupacks; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + goto drop; + } else if (tp->t_dupacks > TCPREXMTTHRESH) { + tp->snd_cwnd += tp->t_maxseg; + (void) tcp_output(tp); + goto drop; + } + } else + tp->t_dupacks = 0; + break; + } + synrx_to_est: + /* + * If the congestion window was inflated to account + * for the other side's cached packets, retract it. + */ + if (tp->t_dupacks > TCPREXMTTHRESH && + tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = 0; + if (SEQ_GT(ti->ti_ack, tp->snd_max)) { + goto dropafterack; + } + acked = ti->ti_ack - tp->snd_una; + + /* + * If transmit timer is running and timed sequence + * number was acked, update smoothed round trip time. + * Since we now have an rtt measurement, cancel the + * timer backoff (cf., Phil Karn's retransmit alg.). + * Recompute the initial retransmit timer. + */ + if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) + tcp_xmit_timer(tp,tp->t_rtt); + + /* + * If all outstanding data is acked, stop retransmit + * timer and remember to restart (more output or persist). + * If there is more data to be acked, restart retransmit + * timer, using current (possibly backed-off) value. + */ + if (ti->ti_ack == tp->snd_max) { + tp->t_timer[TCPT_REXMT] = 0; + needoutput = 1; + } else if (tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * When new data is acked, open the congestion window. + * If the window gives us less than ssthresh packets + * in flight, open exponentially (maxseg per packet). + * Otherwise open linearly: maxseg per window + * (maxseg^2 / cwnd per packet). + */ + { + register u_int cw = tp->snd_cwnd; + register u_int incr = tp->t_maxseg; + + if (cw > tp->snd_ssthresh) + incr = incr * incr / cw; + tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<snd_scale); + } + if (acked > so->so_snd.sb_cc) { + tp->snd_wnd -= so->so_snd.sb_cc; + sbdrop(&so->so_snd, (int )so->so_snd.sb_cc); + ourfinisacked = 1; + } else { + sbdrop(&so->so_snd, acked); + tp->snd_wnd -= acked; + ourfinisacked = 0; + } + tp->snd_una = ti->ti_ack; + if (SEQ_LT(tp->snd_nxt, tp->snd_una)) + tp->snd_nxt = tp->snd_una; + + switch (tp->t_state) { + + /* + * In FIN_WAIT_1 STATE in addition to the processing + * for the ESTABLISHED state if our FIN is now acknowledged + * then enter FIN_WAIT_2. + */ + case TCPS_FIN_WAIT_1: + if (ourfinisacked) { + /* + * If we can't receive any more + * data, then closing user can proceed. + * Starting the timer is contrary to the + * specification, but if we don't get a FIN + * we'll hang forever. + */ + if (so->so_state & SS_FCANTRCVMORE) { + tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; + } + tp->t_state = TCPS_FIN_WAIT_2; + } + break; + + /* + * In CLOSING STATE in addition to the processing for + * the ESTABLISHED state if the ACK acknowledges our FIN + * then enter the TIME-WAIT state, otherwise ignore + * the segment. + */ + case TCPS_CLOSING: + if (ourfinisacked) { + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + } + break; + + /* + * In LAST_ACK, we may still be waiting for data to drain + * and/or to be acked, as well as for the ack of our FIN. + * If our FIN is now acknowledged, delete the TCB, + * enter the closed state and return. + */ + case TCPS_LAST_ACK: + if (ourfinisacked) { + tcp_close(tp); + goto drop; + } + break; + + /* + * In TIME_WAIT state the only thing that should arrive + * is a retransmission of the remote FIN. Acknowledge + * it and restart the finack timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + goto dropafterack; + } + } /* switch(tp->t_state) */ + +step6: + /* + * Update window information. + * Don't look at window if no ACK: TAC's send garbage on first SYN. + */ + if ((tiflags & TH_ACK) && + (SEQ_LT(tp->snd_wl1, ti->ti_seq) || + (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) || + (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { + tp->snd_wnd = tiwin; + tp->snd_wl1 = ti->ti_seq; + tp->snd_wl2 = ti->ti_ack; + if (tp->snd_wnd > tp->max_sndwnd) + tp->max_sndwnd = tp->snd_wnd; + needoutput = 1; + } + + /* + * Process segments with URG. + */ + if ((tiflags & TH_URG) && ti->ti_urp && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * This is a kludge, but if we receive and accept + * random urgent pointers, we'll crash in + * soreceive. It's hard to imagine someone + * actually wanting to send this much urgent data. + */ + if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { + ti->ti_urp = 0; + tiflags &= ~TH_URG; + goto dodata; + } + /* + * If this segment advances the known urgent pointer, + * then mark the data stream. This should not happen + * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since + * a FIN has been received from the remote side. + * In these states we ignore the URG. + * + * According to RFC961 (Assigned Protocols), + * the urgent pointer points to the last octet + * of urgent data. We continue, however, + * to consider it to indicate the first octet + * of data past the urgent section as the original + * spec states (in one of two places). + */ + if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) { + tp->rcv_up = ti->ti_seq + ti->ti_urp; + so->so_urgc = so->so_rcv.sb_cc + + (tp->rcv_up - tp->rcv_nxt); /* -1; */ + tp->rcv_up = ti->ti_seq + ti->ti_urp; + + } + } else + /* + * If no out of band data is expected, + * pull receive urgent pointer along + * with the receive window. + */ + if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) + tp->rcv_up = tp->rcv_nxt; +dodata: + + /* + * If this is a small packet, then ACK now - with Nagel + * congestion avoidance sender won't send more until + * he gets an ACK. + */ + if (ti->ti_len && (unsigned)ti->ti_len <= 5 && + ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { + tp->t_flags |= TF_ACKNOW; + } + + /* + * Process the segment text, merging it into the TCP sequencing queue, + * and arranging for acknowledgment of receipt if necessary. + * This process logically involves adjusting tp->rcv_wnd as data + * is presented to the user (this happens in tcp_usrreq.c, + * case PRU_RCVD). If a FIN has already been received on this + * connection then we just ignore the text. + */ + if ((ti->ti_len || (tiflags&TH_FIN)) && + TCPS_HAVERCVDFIN(tp->t_state) == 0) { + TCP_REASS(tp, ti, m, so, tiflags); + } else { + m_free(m); + tiflags &= ~TH_FIN; + } + + /* + * If FIN is received ACK the FIN and let the user know + * that the connection is closing. + */ + if (tiflags & TH_FIN) { + if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + /* + * If we receive a FIN we can't send more data, + * set it SS_FDRAIN + * Shutdown the socket if there is no rx data in the + * buffer. + * soread() is called on completion of shutdown() and + * will got to TCPS_LAST_ACK, and use tcp_output() + * to send the FIN. + */ + sofwdrain(so); + + tp->t_flags |= TF_ACKNOW; + tp->rcv_nxt++; + } + switch (tp->t_state) { + + /* + * In SYN_RECEIVED and ESTABLISHED STATES + * enter the CLOSE_WAIT state. + */ + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + if(so->so_emu == EMU_CTL) /* no shutdown on socket */ + tp->t_state = TCPS_LAST_ACK; + else + tp->t_state = TCPS_CLOSE_WAIT; + break; + + /* + * If still in FIN_WAIT_1 STATE FIN has not been acked so + * enter the CLOSING state. + */ + case TCPS_FIN_WAIT_1: + tp->t_state = TCPS_CLOSING; + break; + + /* + * In FIN_WAIT_2 state enter the TIME_WAIT state, + * starting the time-wait timer, turning off the other + * standard timers. + */ + case TCPS_FIN_WAIT_2: + tp->t_state = TCPS_TIME_WAIT; + tcp_canceltimers(tp); + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + + /* + * In TIME_WAIT state restart the 2 MSL time_wait timer. + */ + case TCPS_TIME_WAIT: + tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; + break; + } + } + + /* + * Return any desired output. + */ + if (needoutput || (tp->t_flags & TF_ACKNOW)) { + (void) tcp_output(tp); + } + return; + +dropafterack: + /* + * Generate an ACK dropping incoming segment if it occupies + * sequence space, where the ACK reflects our state. + */ + if (tiflags & TH_RST) + goto drop; + m_free(m); + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + return; + +dropwithreset: + /* reuses m if m!=NULL, m_free() unnecessary */ + if (tiflags & TH_ACK) + tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST); + else { + if (tiflags & TH_SYN) ti->ti_len++; + tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0, + TH_RST|TH_ACK); + } + + return; + +drop: + /* + * Drop space held by incoming segment and return. + */ + m_free(m); +} + +static void +tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti) +{ + uint16_t mss; + int opt, optlen; + + DEBUG_CALL("tcp_dooptions"); + DEBUG_ARGS((dfd, " tp = %lx cnt=%i\n", (long)tp, cnt)); + + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + switch (opt) { + + default: + continue; + + case TCPOPT_MAXSEG: + if (optlen != TCPOLEN_MAXSEG) + continue; + if (!(ti->ti_flags & TH_SYN)) + continue; + memcpy((char *) &mss, (char *) cp + 2, sizeof(mss)); + NTOHS(mss); + (void) tcp_mss(tp, mss); /* sets t_maxseg */ + break; + } + } +} + + +/* + * Pull out of band byte out of a segment so + * it doesn't appear in the user's data queue. + * It is still reflected in the segment length for + * sequencing purposes. + */ + +#ifdef notdef + +void +tcp_pulloutofband(so, ti, m) + struct socket *so; + struct tcpiphdr *ti; + register struct mbuf *m; +{ + int cnt = ti->ti_urp - 1; + + while (cnt >= 0) { + if (m->m_len > cnt) { + char *cp = mtod(m, caddr_t) + cnt; + struct tcpcb *tp = sototcpcb(so); + + tp->t_iobc = *cp; + tp->t_oobflags |= TCPOOB_HAVEDATA; + memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1)); + m->m_len--; + return; + } + cnt -= m->m_len; + m = m->m_next; /* XXX WRONG! Fix it! */ + if (m == 0) + break; + } + panic("tcp_pulloutofband"); +} + +#endif /* notdef */ + +/* + * Collect new round-trip time estimate + * and update averages and current timeout. + */ + +static void +tcp_xmit_timer(register struct tcpcb *tp, int rtt) +{ + register short delta; + + DEBUG_CALL("tcp_xmit_timer"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("rtt = %d", rtt); + + if (tp->t_srtt != 0) { + /* + * srtt is stored as fixed point with 3 bits after the + * binary point (i.e., scaled by 8). The following magic + * is equivalent to the smoothing algorithm in rfc793 with + * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed + * point). Adjust rtt to origin 0. + */ + delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); + if ((tp->t_srtt += delta) <= 0) + tp->t_srtt = 1; + /* + * We accumulate a smoothed rtt variance (actually, a + * smoothed mean difference), then set the retransmit + * timer to smoothed rtt + 4 times the smoothed variance. + * rttvar is stored as fixed point with 2 bits after the + * binary point (scaled by 4). The following is + * equivalent to rfc793 smoothing with an alpha of .75 + * (rttvar = rttvar*3/4 + |delta| / 4). This replaces + * rfc793's wired-in beta. + */ + if (delta < 0) + delta = -delta; + delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + if ((tp->t_rttvar += delta) <= 0) + tp->t_rttvar = 1; + } else { + /* + * No rtt measurement yet - use the unsmoothed rtt. + * Set the variance to half the rtt (so our first + * retransmit happens at 3*rtt). + */ + tp->t_srtt = rtt << TCP_RTT_SHIFT; + tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); + } + tp->t_rtt = 0; + tp->t_rxtshift = 0; + + /* + * the retransmit should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + */ + TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + + /* + * We received an ack for a packet that wasn't retransmitted; + * it is probably safe to discard any error indications we've + * received recently. This isn't quite right, but close enough + * for now (a route might have failed after we sent a segment, + * and the return path might not be symmetrical). + */ + tp->t_softerror = 0; +} + +/* + * Determine a reasonable value for maxseg size. + * If the route is known, check route for mtu. + * If none, use an mss that can be handled on the outgoing + * interface without forcing IP to fragment; if bigger than + * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES + * to utilize large mbufs. If no route is found, route has no mtu, + * or the destination isn't local, use a default, hopefully conservative + * size (usually 512 or the default IP max size, but no more than the mtu + * of the interface), as we can't discover anything about intervening + * gateways or networks. We also initialize the congestion/slow start + * window to be a single segment if the destination isn't local. + * While looking at the routing entry, we also initialize other path-dependent + * parameters from pre-set or cached values in the routing entry. + */ + +int +tcp_mss(struct tcpcb *tp, u_int offer) +{ + struct socket *so = tp->t_socket; + int mss; + + DEBUG_CALL("tcp_mss"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("offer = %d", offer); + + mss = min(IF_MTU, IF_MRU) - sizeof(struct tcpiphdr); + if (offer) + mss = min(mss, offer); + mss = max(mss, 32); + if (mss < tp->t_maxseg || offer != 0) + tp->t_maxseg = mss; + + tp->snd_cwnd = mss; + + sbreserve(&so->so_snd, TCP_SNDSPACE + ((TCP_SNDSPACE % mss) ? + (mss - (TCP_SNDSPACE % mss)) : + 0)); + sbreserve(&so->so_rcv, TCP_RCVSPACE + ((TCP_RCVSPACE % mss) ? + (mss - (TCP_RCVSPACE % mss)) : + 0)); + + DEBUG_MISC((dfd, " returning mss = %d\n", mss)); + + return mss; +} diff --git a/slirp/tcp_output.c b/slirp/tcp_output.c new file mode 100644 index 00000000..8aa3d904 --- /dev/null +++ b/slirp/tcp_output.c @@ -0,0 +1,493 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 + * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include + +static const u_char tcp_outflags[TCP_NSTATES] = { + TH_RST|TH_ACK, 0, TH_SYN, TH_SYN|TH_ACK, + TH_ACK, TH_ACK, TH_FIN|TH_ACK, TH_FIN|TH_ACK, + TH_FIN|TH_ACK, TH_ACK, TH_ACK, +}; + + +#undef MAX_TCPOPTLEN +#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ + +/* + * Tcp output routine: figure out what should be sent and send it. + */ +int +tcp_output(struct tcpcb *tp) +{ + register struct socket *so = tp->t_socket; + register long len, win; + int off, flags, error; + register struct mbuf *m; + register struct tcpiphdr *ti; + u_char opt[MAX_TCPOPTLEN]; + unsigned optlen, hdrlen; + int idle, sendalot; + + DEBUG_CALL("tcp_output"); + DEBUG_ARG("tp = %lx", (long )tp); + + /* + * Determine length of data that should be transmitted, + * and flags that will be used. + * If there is some data or critical controls (SYN, RST) + * to send, then transmit; otherwise, investigate further. + */ + idle = (tp->snd_max == tp->snd_una); + if (idle && tp->t_idle >= tp->t_rxtcur) + /* + * We have been idle for "a while" and no acks are + * expected to clock out any data we send -- + * slow start to get ack "clock" running again. + */ + tp->snd_cwnd = tp->t_maxseg; +again: + sendalot = 0; + off = tp->snd_nxt - tp->snd_una; + win = min(tp->snd_wnd, tp->snd_cwnd); + + flags = tcp_outflags[tp->t_state]; + + DEBUG_MISC((dfd, " --- tcp_output flags = 0x%x\n",flags)); + + /* + * If in persist timeout with window of 0, send 1 byte. + * Otherwise, if window is small but nonzero + * and timer expired, we will send what we can + * and go to transmit state. + */ + if (tp->t_force) { + if (win == 0) { + /* + * If we still have some data to send, then + * clear the FIN bit. Usually this would + * happen below when it realizes that we + * aren't sending all the data. However, + * if we have exactly 1 byte of unset data, + * then it won't clear the FIN bit below, + * and if we are in persist state, we wind + * up sending the packet without recording + * that we sent the FIN bit. + * + * We can't just blindly clear the FIN bit, + * because if we don't have any more data + * to send then the probe will be the FIN + * itself. + */ + if (off < so->so_snd.sb_cc) + flags &= ~TH_FIN; + win = 1; + } else { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + + len = min(so->so_snd.sb_cc, win) - off; + + if (len < 0) { + /* + * If FIN has been sent but not acked, + * but we haven't been called to retransmit, + * len will be -1. Otherwise, window shrank + * after we sent into it. If window shrank to 0, + * cancel pending retransmit and pull snd_nxt + * back to (closed) window. We will enter persist + * state below. If the window didn't close completely, + * just wait for an ACK. + */ + len = 0; + if (win == 0) { + tp->t_timer[TCPT_REXMT] = 0; + tp->snd_nxt = tp->snd_una; + } + } + + if (len > tp->t_maxseg) { + len = tp->t_maxseg; + sendalot = 1; + } + if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) + flags &= ~TH_FIN; + + win = sbspace(&so->so_rcv); + + /* + * Sender silly window avoidance. If connection is idle + * and can send all data, a maximum segment, + * at least a maximum default-size segment do it, + * or are forced, do it; otherwise don't bother. + * If peer's buffer is tiny, then send + * when window is at least half open. + * If retransmitting (possibly after persist timer forced us + * to send into a small window), then must resend. + */ + if (len) { + if (len == tp->t_maxseg) + goto send; + if ((1 || idle || tp->t_flags & TF_NODELAY) && + len + off >= so->so_snd.sb_cc) + goto send; + if (tp->t_force) + goto send; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) + goto send; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) + goto send; + } + + /* + * Compare available window to amount of window + * known to peer (as advertised window less + * next expected input). If the difference is at least two + * max size segments, or at least 50% of the maximum possible + * window, then want to send a window update to peer. + */ + if (win > 0) { + /* + * "adv" is the amount we can increase the window, + * taking into account that we are limited by + * TCP_MAXWIN << tp->rcv_scale. + */ + long adv = min(win, (long)TCP_MAXWIN << tp->rcv_scale) - + (tp->rcv_adv - tp->rcv_nxt); + + if (adv >= (long) (2 * tp->t_maxseg)) + goto send; + if (2 * adv >= (long) so->so_rcv.sb_datalen) + goto send; + } + + /* + * Send if we owe peer an ACK. + */ + if (tp->t_flags & TF_ACKNOW) + goto send; + if (flags & (TH_SYN|TH_RST)) + goto send; + if (SEQ_GT(tp->snd_up, tp->snd_una)) + goto send; + /* + * If our state indicates that FIN should be sent + * and we have not yet done so, or we're retransmitting the FIN, + * then we need to send. + */ + if (flags & TH_FIN && + ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) + goto send; + + /* + * TCP window updates are not reliable, rather a polling protocol + * using ``persist'' packets is used to insure receipt of window + * updates. The three ``states'' for the output side are: + * idle not doing retransmits or persists + * persisting to move a small or zero window + * (re)transmitting and thereby not persisting + * + * tp->t_timer[TCPT_PERSIST] + * is set when we are in persist state. + * tp->t_force + * is set when we are called to send a persist packet. + * tp->t_timer[TCPT_REXMT] + * is set when we are retransmitting + * The output side is idle when both timers are zero. + * + * If send window is too small, there is data to transmit, and no + * retransmit or persist is pending, then go to persist state. + * If nothing happens soon, send when timer expires: + * if window is nonzero, transmit what we can, + * otherwise force out a byte. + */ + if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) { + tp->t_rxtshift = 0; + tcp_setpersist(tp); + } + + /* + * No reason to send a segment, just return. + */ + return (0); + +send: + /* + * Before ESTABLISHED, force sending of initial options + * unless TCP set not to do any options. + * NOTE: we assume that the IP/TCP header plus TCP options + * always fit in a single mbuf, leaving room for a maximum + * link header, i.e. + * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN + */ + optlen = 0; + hdrlen = sizeof (struct tcpiphdr); + if (flags & TH_SYN) { + tp->snd_nxt = tp->iss; + if ((tp->t_flags & TF_NOOPT) == 0) { + uint16_t mss; + + opt[0] = TCPOPT_MAXSEG; + opt[1] = 4; + mss = htons((uint16_t) tcp_mss(tp, 0)); + memcpy((caddr_t)(opt + 2), (caddr_t)&mss, sizeof(mss)); + optlen = 4; + } + } + + hdrlen += optlen; + + /* + * Adjust data length if insertion of options will + * bump the packet length beyond the t_maxseg length. + */ + if (len > tp->t_maxseg - optlen) { + len = tp->t_maxseg - optlen; + sendalot = 1; + } + + /* + * Grab a header mbuf, attaching a copy of data to + * be transmitted, and initialize the header from + * the template for sends on this connection. + */ + if (len) { + m = m_get(so->slirp); + if (m == NULL) { + error = 1; + goto out; + } + m->m_data += IF_MAXLINKHDR; + m->m_len = hdrlen; + + sbcopy(&so->so_snd, off, (int) len, mtod(m, caddr_t) + hdrlen); + m->m_len += len; + + /* + * If we're sending everything we've got, set PUSH. + * (This will keep happy those implementations which only + * give data to the user when a buffer fills or + * a PUSH comes in.) + */ + if (off + len == so->so_snd.sb_cc) + flags |= TH_PUSH; + } else { + m = m_get(so->slirp); + if (m == NULL) { + error = 1; + goto out; + } + m->m_data += IF_MAXLINKHDR; + m->m_len = hdrlen; + } + + ti = mtod(m, struct tcpiphdr *); + + memcpy((caddr_t)ti, &tp->t_template, sizeof (struct tcpiphdr)); + + /* + * Fill in fields, remembering maximum advertised + * window for use in delaying messages about window sizes. + * If resending a FIN, be sure not to use a new sequence number. + */ + if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && + tp->snd_nxt == tp->snd_max) + tp->snd_nxt--; + /* + * If we are doing retransmissions, then snd_nxt will + * not reflect the first unsent octet. For ACK only + * packets, we do not want the sequence number of the + * retransmitted packet, we want the sequence number + * of the next unsent octet. So, if there is no data + * (and no SYN or FIN), use snd_max instead of snd_nxt + * when filling in ti_seq. But if we are in persist + * state, snd_max might reflect one byte beyond the + * right edge of the window, so use snd_nxt in that + * case, since we know we aren't doing a retransmission. + * (retransmit and persist are mutually exclusive...) + */ + if (len || (flags & (TH_SYN|TH_FIN)) || tp->t_timer[TCPT_PERSIST]) + ti->ti_seq = htonl(tp->snd_nxt); + else + ti->ti_seq = htonl(tp->snd_max); + ti->ti_ack = htonl(tp->rcv_nxt); + if (optlen) { + memcpy((caddr_t)(ti + 1), (caddr_t)opt, optlen); + ti->ti_off = (sizeof (struct tcphdr) + optlen) >> 2; + } + ti->ti_flags = flags; + /* + * Calculate receive window. Don't shrink window, + * but avoid silly window syndrome. + */ + if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) + win = 0; + if (win > (long)TCP_MAXWIN << tp->rcv_scale) + win = (long)TCP_MAXWIN << tp->rcv_scale; + if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) + win = (long)(tp->rcv_adv - tp->rcv_nxt); + ti->ti_win = htons((uint16_t) (win>>tp->rcv_scale)); + + if (SEQ_GT(tp->snd_up, tp->snd_una)) { + ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); + ti->ti_flags |= TH_URG; + } else + /* + * If no urgent pointer to send, then we pull + * the urgent pointer to the left edge of the send window + * so that it doesn't drift into the send window on sequence + * number wraparound. + */ + tp->snd_up = tp->snd_una; /* drag it along */ + + /* + * Put TCP length in extended header, and then + * checksum extended header and data. + */ + if (len + optlen) + ti->ti_len = htons((uint16_t)(sizeof (struct tcphdr) + + optlen + len)); + ti->ti_sum = cksum(m, (int)(hdrlen + len)); + + /* + * In transmit state, time the transmission and arrange for + * the retransmit. In persist state, just set snd_max. + */ + if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { + tcp_seq startseq = tp->snd_nxt; + + /* + * Advance snd_nxt over sequence space of this segment. + */ + if (flags & (TH_SYN|TH_FIN)) { + if (flags & TH_SYN) + tp->snd_nxt++; + if (flags & TH_FIN) { + tp->snd_nxt++; + tp->t_flags |= TF_SENTFIN; + } + } + tp->snd_nxt += len; + if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { + tp->snd_max = tp->snd_nxt; + /* + * Time this transmission if not a retransmission and + * not currently timing anything. + */ + if (tp->t_rtt == 0) { + tp->t_rtt = 1; + tp->t_rtseq = startseq; + } + } + + /* + * Set retransmit timer if not currently set, + * and not doing an ack or a keep-alive probe. + * Initial value for retransmit timer is smoothed + * round-trip time + 2 * round-trip time variance. + * Initialize shift counter which is used for backoff + * of retransmit time. + */ + if (tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_una) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + } else + if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) + tp->snd_max = tp->snd_nxt + len; + + /* + * Fill in IP length and desired time to live and + * send to IP level. There should be a better way + * to handle ttl and tos; we could keep them in + * the template, but need a way to checksum without them. + */ + m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ + + { + + ((struct ip *)ti)->ip_len = m->m_len; + + ((struct ip *)ti)->ip_ttl = IPDEFTTL; + ((struct ip *)ti)->ip_tos = so->so_iptos; + + error = ip_output(so, m); + } + if (error) { +out: + return (error); + } + + /* + * Data sent (as far as we can tell). + * If this advertises a larger window than any other segment, + * then remember the size of the advertised window. + * Any pending ACK has now been sent. + */ + if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv)) + tp->rcv_adv = tp->rcv_nxt + win; + tp->last_ack_sent = tp->rcv_nxt; + tp->t_flags &= ~(TF_ACKNOW|TF_DELACK); + if (sendalot) + goto again; + + return (0); +} + +void +tcp_setpersist(struct tcpcb *tp) +{ + int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; + + /* + * Start/restart persistence timer. + */ + TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], + t * tcp_backoff[tp->t_rxtshift], + TCPTV_PERSMIN, TCPTV_PERSMAX); + if (tp->t_rxtshift < TCP_MAXRXTSHIFT) + tp->t_rxtshift++; +} diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c new file mode 100644 index 00000000..7571c5a2 --- /dev/null +++ b/slirp/tcp_subr.c @@ -0,0 +1,926 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 + * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include + +/* patchable/settable parameters for tcp */ +/* Don't do rfc1323 performance enhancements */ +#define TCP_DO_RFC1323 0 + +/* + * Tcp initialization + */ +void +tcp_init(Slirp *slirp) +{ + slirp->tcp_iss = 1; /* wrong */ + slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; + slirp->tcp_last_so = &slirp->tcb; +} + +void tcp_cleanup(Slirp *slirp) +{ + while (slirp->tcb.so_next != &slirp->tcb) { + tcp_close(sototcpcb(slirp->tcb.so_next)); + } +} + +/* + * Create template to be used to send tcp packets on a connection. + * Call after host entry created, fills + * in a skeletal tcp/ip header, minimizing the amount of work + * necessary when the connection is used. + */ +void +tcp_template(struct tcpcb *tp) +{ + struct socket *so = tp->t_socket; + register struct tcpiphdr *n = &tp->t_template; + + n->ti_mbuf = NULL; + n->ti_x1 = 0; + n->ti_pr = IPPROTO_TCP; + n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip)); + n->ti_src = so->so_faddr; + n->ti_dst = so->so_laddr; + n->ti_sport = so->so_fport; + n->ti_dport = so->so_lport; + + n->ti_seq = 0; + n->ti_ack = 0; + n->ti_x2 = 0; + n->ti_off = 5; + n->ti_flags = 0; + n->ti_win = 0; + n->ti_sum = 0; + n->ti_urp = 0; +} + +/* + * Send a single message to the TCP at address specified by + * the given TCP/IP header. If m == 0, then we make a copy + * of the tcpiphdr at ti and send directly to the addressed host. + * This is used to force keep alive messages out using the TCP + * template for a connection tp->t_template. If flags are given + * then we send a message back to the TCP which originated the + * segment ti, and discard the mbuf containing it and any other + * attached mbufs. + * + * In any case the ack and sequence number of the transmitted + * segment are as specified by the parameters. + */ +void +tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, + tcp_seq ack, tcp_seq seq, int flags) +{ + register int tlen; + int win = 0; + + DEBUG_CALL("tcp_respond"); + DEBUG_ARG("tp = %p", tp); + DEBUG_ARG("ti = %p", ti); + DEBUG_ARG("m = %p", m); + DEBUG_ARG("ack = %u", ack); + DEBUG_ARG("seq = %u", seq); + DEBUG_ARG("flags = %x", flags); + + if (tp) + win = sbspace(&tp->t_socket->so_rcv); + if (m == NULL) { + if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) + return; + tlen = 0; + m->m_data += IF_MAXLINKHDR; + *mtod(m, struct tcpiphdr *) = *ti; + ti = mtod(m, struct tcpiphdr *); + flags = TH_ACK; + } else { + /* + * ti points into m so the next line is just making + * the mbuf point to ti + */ + m->m_data = (caddr_t)ti; + + m->m_len = sizeof (struct tcpiphdr); + tlen = 0; +#define xchg(a,b,type) { type t; t=a; a=b; b=t; } + xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); + xchg(ti->ti_dport, ti->ti_sport, uint16_t); +#undef xchg + } + ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen)); + tlen += sizeof (struct tcpiphdr); + m->m_len = tlen; + + ti->ti_mbuf = NULL; + ti->ti_x1 = 0; + ti->ti_seq = htonl(seq); + ti->ti_ack = htonl(ack); + ti->ti_x2 = 0; + ti->ti_off = sizeof (struct tcphdr) >> 2; + ti->ti_flags = flags; + if (tp) + ti->ti_win = htons((uint16_t) (win >> tp->rcv_scale)); + else + ti->ti_win = htons((uint16_t)win); + ti->ti_urp = 0; + ti->ti_sum = 0; + ti->ti_sum = cksum(m, tlen); + ((struct ip *)ti)->ip_len = tlen; + + if(flags & TH_RST) + ((struct ip *)ti)->ip_ttl = MAXTTL; + else + ((struct ip *)ti)->ip_ttl = IPDEFTTL; + + (void) ip_output((struct socket *)0, m); +} + +/* + * Create a new TCP control block, making an + * empty reassembly queue and hooking it to the argument + * protocol control block. + */ +struct tcpcb * +tcp_newtcpcb(struct socket *so) +{ + register struct tcpcb *tp; + + tp = (struct tcpcb *)malloc(sizeof(*tp)); + if (tp == NULL) + return ((struct tcpcb *)0); + + memset((char *) tp, 0, sizeof(struct tcpcb)); + tp->seg_next = tp->seg_prev = (struct tcpiphdr*)tp; + tp->t_maxseg = TCP_MSS; + + tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0; + tp->t_socket = so; + + /* + * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no + * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives + * reasonable initial retransmit time. + */ + tp->t_srtt = TCPTV_SRTTBASE; + tp->t_rttvar = TCPTV_SRTTDFLT << 2; + tp->t_rttmin = TCPTV_MIN; + + TCPT_RANGESET(tp->t_rxtcur, + ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, + TCPTV_MIN, TCPTV_REXMTMAX); + + tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; + tp->t_state = TCPS_CLOSED; + + so->so_tcpcb = tp; + + return (tp); +} + +/* + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +struct tcpcb *tcp_drop(struct tcpcb *tp, int err) +{ + DEBUG_CALL("tcp_drop"); + DEBUG_ARG("tp = %lx", (long)tp); + DEBUG_ARG("errno = %d", errno); + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_state = TCPS_CLOSED; + (void) tcp_output(tp); + } + return (tcp_close(tp)); +} + +/* + * Close a TCP control block: + * discard all space held by the tcp + * discard internet protocol block + * wake up any sleepers + */ +struct tcpcb * +tcp_close(struct tcpcb *tp) +{ + register struct tcpiphdr *t; + struct socket *so = tp->t_socket; + Slirp *slirp = so->slirp; + register struct mbuf *m; + + DEBUG_CALL("tcp_close"); + DEBUG_ARG("tp = %lx", (long )tp); + + /* free the reassembly queue, if any */ + t = tcpfrag_list_first(tp); + while (!tcpfrag_list_end(t, tp)) { + t = tcpiphdr_next(t); + m = tcpiphdr_prev(t)->ti_mbuf; + remque(tcpiphdr2qlink(tcpiphdr_prev(t))); + m_free(m); + } + free(tp); + so->so_tcpcb = NULL; + /* clobber input socket cache if we're closing the cached connection */ + if (so == slirp->tcp_last_so) + slirp->tcp_last_so = &slirp->tcb; + closesocket(so->s); + sbfree(&so->so_rcv); + sbfree(&so->so_snd); + sofree(so); + return ((struct tcpcb *)0); +} + +/* + * TCP protocol interface to socket abstraction. + */ + +/* + * User issued close, and wish to trail through shutdown states: + * if never received SYN, just forget it. If got a SYN from peer, + * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. + * If already got a FIN from peer, then almost done; go to LAST_ACK + * state. In all other cases, have already sent FIN to peer (e.g. + * after PRU_SHUTDOWN), and just have to play tedious game waiting + * for peer to send FIN or not respond to keep-alives, etc. + * We can let the user exit from the close as soon as the FIN is acked. + */ +void +tcp_sockclosed(struct tcpcb *tp) +{ + + DEBUG_CALL("tcp_sockclosed"); + DEBUG_ARG("tp = %lx", (long)tp); + + switch (tp->t_state) { + + case TCPS_CLOSED: + case TCPS_LISTEN: + case TCPS_SYN_SENT: + tp->t_state = TCPS_CLOSED; + tp = tcp_close(tp); + break; + + case TCPS_SYN_RECEIVED: + case TCPS_ESTABLISHED: + tp->t_state = TCPS_FIN_WAIT_1; + break; + + case TCPS_CLOSE_WAIT: + tp->t_state = TCPS_LAST_ACK; + break; + } + if (tp) + tcp_output(tp); +} + +/* + * Connect to a host on the Internet + * Called by tcp_input + * Only do a connect, the tcp fields will be set in tcp_input + * return 0 if there's a result of the connect, + * else return -1 means we're still connecting + * The return value is almost always -1 since the socket is + * nonblocking. Connect returns after the SYN is sent, and does + * not wait for ACK+SYN. + */ +int tcp_fconnect(struct socket *so) +{ + Slirp *slirp = so->slirp; + int ret=0; + + DEBUG_CALL("tcp_fconnect"); + DEBUG_ARG("so = %lx", (long )so); + + if( (ret = so->s = qemu_socket(AF_INET,SOCK_STREAM,0)) >= 0) { + int opt, s=so->s; + struct sockaddr_in addr; + + qemu_set_nonblock(s); + socket_set_fast_reuse(s); + opt = 1; + qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); + + addr.sin_family = AF_INET; + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + /* It's an alias */ + if (so->so_faddr.s_addr == slirp->vnameserver_addr.s_addr) { + if (get_dns_addr(&addr.sin_addr) < 0) + addr.sin_addr = loopback_addr; + } else { + addr.sin_addr = loopback_addr; + } + } else + addr.sin_addr = so->so_faddr; + addr.sin_port = so->so_fport; + + DEBUG_MISC((dfd, " connect()ing, addr.sin_port=%d, " + "addr.sin_addr.s_addr=%.16s\n", + ntohs(addr.sin_port), inet_ntoa(addr.sin_addr))); + /* We don't care what port we get */ + ret = connect(s,(struct sockaddr *)&addr,sizeof (addr)); + + /* + * If it's not in progress, it failed, so we just return 0, + * without clearing SS_NOFDREF + */ + soisfconnecting(so); + } + + return(ret); +} + +/* + * Accept the socket and connect to the local-host + * + * We have a problem. The correct thing to do would be + * to first connect to the local-host, and only if the + * connection is accepted, then do an accept() here. + * But, a) we need to know who's trying to connect + * to the socket to be able to SYN the local-host, and + * b) we are already connected to the foreign host by + * the time it gets to accept(), so... We simply accept + * here and SYN the local-host. + */ +void tcp_connect(struct socket *inso) +{ + Slirp *slirp = inso->slirp; + struct socket *so; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct tcpcb *tp; + int s, opt; + + DEBUG_CALL("tcp_connect"); + DEBUG_ARG("inso = %lx", (long)inso); + + /* + * If it's an SS_ACCEPTONCE socket, no need to socreate() + * another socket, just use the accept() socket. + */ + if (inso->so_state & SS_FACCEPTONCE) { + /* FACCEPTONCE already have a tcpcb */ + so = inso; + } else { + so = socreate(slirp); + if (so == NULL) { + /* If it failed, get rid of the pending connection */ + closesocket(accept(inso->s, (struct sockaddr *)&addr, &addrlen)); + return; + } + if (tcp_attach(so) < 0) { + free(so); /* NOT sofree */ + return; + } + so->so_laddr = inso->so_laddr; + so->so_lport = inso->so_lport; + } + + tcp_mss(sototcpcb(so), 0); + + s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); + if (s < 0) { + tcp_close(sototcpcb(so)); /* This will sofree() as well */ + return; + } + qemu_set_nonblock(s); + socket_set_fast_reuse(s); + opt = 1; + qemu_setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); + socket_set_nodelay(s); + + so->so_fport = addr.sin_port; + so->so_faddr = addr.sin_addr; + /* Translate connections from localhost to the real hostname */ + if (so->so_faddr.s_addr == 0 || + (so->so_faddr.s_addr & loopback_mask) == + (loopback_addr.s_addr & loopback_mask)) { + so->so_faddr = slirp->vhost_addr; + } + + /* Close the accept() socket, set right state */ + if (inso->so_state & SS_FACCEPTONCE) { + /* If we only accept once, close the accept() socket */ + closesocket(so->s); + + /* Don't select it yet, even though we have an FD */ + /* if it's not FACCEPTONCE, it's already NOFDREF */ + so->so_state = SS_NOFDREF; + } + so->s = s; + so->so_state |= SS_INCOMING; + + so->so_iptos = tcp_tos(so); + tp = sototcpcb(so); + + tcp_template(tp); + + tp->t_state = TCPS_SYN_SENT; + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; + tp->iss = slirp->tcp_iss; + slirp->tcp_iss += TCP_ISSINCR/2; + tcp_sendseqinit(tp); + tcp_output(tp); +} + +/* + * Attach a TCPCB to a socket. + */ +int +tcp_attach(struct socket *so) +{ + if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) + return -1; + + insque(so, &so->slirp->tcb); + + return 0; +} + +/* + * Set the socket's type of service field + */ +static const struct tos_t tcptos[] = { + {0, 20, IPTOS_THROUGHPUT, 0}, /* ftp data */ + {21, 21, IPTOS_LOWDELAY, EMU_FTP}, /* ftp control */ + {0, 23, IPTOS_LOWDELAY, 0}, /* telnet */ + {0, 80, IPTOS_THROUGHPUT, 0}, /* WWW */ + {0, 513, IPTOS_LOWDELAY, EMU_RLOGIN|EMU_NOCONNECT}, /* rlogin */ + {0, 514, IPTOS_LOWDELAY, EMU_RSH|EMU_NOCONNECT}, /* shell */ + {0, 544, IPTOS_LOWDELAY, EMU_KSH}, /* kshell */ + {0, 543, IPTOS_LOWDELAY, 0}, /* klogin */ + {0, 6667, IPTOS_THROUGHPUT, EMU_IRC}, /* IRC */ + {0, 6668, IPTOS_THROUGHPUT, EMU_IRC}, /* IRC undernet */ + {0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ + {0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ + {0, 0, 0, 0} +}; + +static struct emu_t *tcpemu = NULL; + +/* + * Return TOS according to the above table + */ +uint8_t +tcp_tos(struct socket *so) +{ + int i = 0; + struct emu_t *emup; + + while(tcptos[i].tos) { + if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || + (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { + so->so_emu = tcptos[i].emu; + return tcptos[i].tos; + } + i++; + } + + /* Nope, lets see if there's a user-added one */ + for (emup = tcpemu; emup; emup = emup->next) { + if ((emup->fport && (ntohs(so->so_fport) == emup->fport)) || + (emup->lport && (ntohs(so->so_lport) == emup->lport))) { + so->so_emu = emup->emu; + return emup->tos; + } + } + + return 0; +} + +/* + * Emulate programs that try and connect to us + * This includes ftp (the data connection is + * initiated by the server) and IRC (DCC CHAT and + * DCC SEND) for now + * + * NOTE: It's possible to crash SLiRP by sending it + * unstandard strings to emulate... if this is a problem, + * more checks are needed here + * + * XXX Assumes the whole command came in one packet + * + * XXX Some ftp clients will have their TOS set to + * LOWDELAY and so Nagel will kick in. Because of this, + * we'll get the first letter, followed by the rest, so + * we simply scan for ORT instead of PORT... + * DCC doesn't have this problem because there's other stuff + * in the packet before the DCC command. + * + * Return 1 if the mbuf m is still valid and should be + * sbappend()ed + * + * NOTE: if you return 0 you MUST m_free() the mbuf! + */ +int +tcp_emu(struct socket *so, struct mbuf *m) +{ + Slirp *slirp = so->slirp; + u_int n1, n2, n3, n4, n5, n6; + char buff[257]; + uint32_t laddr; + u_int lport; + char *bptr; + + DEBUG_CALL("tcp_emu"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + + switch(so->so_emu) { + int x, i; + + case EMU_IDENT: + /* + * Identification protocol as per rfc-1413 + */ + + { + struct socket *tmpso; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(struct sockaddr_in); + struct sbuf *so_rcv = &so->so_rcv; + + memcpy(so_rcv->sb_wptr, m->m_data, m->m_len); + so_rcv->sb_wptr += m->m_len; + so_rcv->sb_rptr += m->m_len; + m->m_data[m->m_len] = 0; /* NULL terminate */ + if (strchr(m->m_data, '\r') || strchr(m->m_data, '\n')) { + if (sscanf(so_rcv->sb_data, "%u%*[ ,]%u", &n1, &n2) == 2) { + HTONS(n1); + HTONS(n2); + /* n2 is the one on our host */ + for (tmpso = slirp->tcb.so_next; + tmpso != &slirp->tcb; + tmpso = tmpso->so_next) { + if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && + tmpso->so_lport == n2 && + tmpso->so_faddr.s_addr == so->so_faddr.s_addr && + tmpso->so_fport == n1) { + if (getsockname(tmpso->s, + (struct sockaddr *)&addr, &addrlen) == 0) + n2 = ntohs(addr.sin_port); + break; + } + } + } + so_rcv->sb_cc = snprintf(so_rcv->sb_data, + so_rcv->sb_datalen, + "%d,%d\r\n", n1, n2); + so_rcv->sb_rptr = so_rcv->sb_data; + so_rcv->sb_wptr = so_rcv->sb_data + so_rcv->sb_cc; + } + m_free(m); + return 0; + } + + case EMU_FTP: /* ftp */ + *(m->m_data+m->m_len) = 0; /* NUL terminate for strstr */ + if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { + /* + * Need to emulate the PORT command + */ + x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", + &n1, &n2, &n3, &n4, &n5, &n6, buff); + if (x < 6) + return 1; + + laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); + lport = htons((n5 << 8) | (n6)); + + if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, + lport, SS_FACCEPTONCE)) == NULL) { + return 1; + } + n6 = ntohs(so->so_fport); + + n5 = (n6 >> 8) & 0xff; + n6 &= 0xff; + + laddr = ntohl(so->so_faddr.s_addr); + + n1 = ((laddr >> 24) & 0xff); + n2 = ((laddr >> 16) & 0xff); + n3 = ((laddr >> 8) & 0xff); + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size - m->m_len, + "ORT %d,%d,%d,%d,%d,%d\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + return 1; + } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { + /* + * Need to emulate the PASV response + */ + x = sscanf(bptr, "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", + &n1, &n2, &n3, &n4, &n5, &n6, buff); + if (x < 6) + return 1; + + laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); + lport = htons((n5 << 8) | (n6)); + + if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, + lport, SS_FACCEPTONCE)) == NULL) { + return 1; + } + n6 = ntohs(so->so_fport); + + n5 = (n6 >> 8) & 0xff; + n6 &= 0xff; + + laddr = ntohl(so->so_faddr.s_addr); + + n1 = ((laddr >> 24) & 0xff); + n2 = ((laddr >> 16) & 0xff); + n3 = ((laddr >> 8) & 0xff); + n4 = (laddr & 0xff); + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size - m->m_len, + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x==7?buff:""); + + return 1; + } + + return 1; + + case EMU_KSH: + /* + * The kshell (Kerberos rsh) and shell services both pass + * a local port port number to carry signals to the server + * and stderr to the client. It is passed at the beginning + * of the connection as a NUL-terminated decimal ASCII string. + */ + so->so_emu = 0; + for (lport = 0, i = 0; i < m->m_len-1; ++i) { + if (m->m_data[i] < '0' || m->m_data[i] > '9') + return 1; /* invalid number */ + lport *= 10; + lport += m->m_data[i] - '0'; + } + if (m->m_data[m->m_len-1] == '\0' && lport != 0 && + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) + m->m_len = snprintf(m->m_data, m->m_size, "%d", + ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: + /* + * Need to emulate DCC CHAT, DCC SEND and DCC MOVE + */ + *(m->m_data+m->m_len) = 0; /* NULL terminate the string for strstr */ + if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) + return 1; + + /* The %256s is for the broken mIRC */ + if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, + htonl(laddr), htons(lport), + SS_FACCEPTONCE)) == NULL) { + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size, + "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, + htonl(laddr), htons(lport), + SS_FACCEPTONCE)) == NULL) { + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size, + "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, &n1) == 4) { + if ((so = tcp_listen(slirp, INADDR_ANY, 0, + htonl(laddr), htons(lport), + SS_FACCEPTONCE)) == NULL) { + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += snprintf(bptr, m->m_size, + "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } + return 1; + + case EMU_REALAUDIO: + /* + * RealAudio emulation - JP. We must try to parse the incoming + * data and try to find the two characters that contain the + * port number. Then we redirect an udp port and replace the + * number with the real port we got. + * + * The 1.0 beta versions of the player are not supported + * any more. + * + * A typical packet for player version 1.0 (release version): + * + * 0000:50 4E 41 00 05 + * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P + * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH + * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v + * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB + * + * Now the port number 0x1BD7 is found at offset 0x04 of the + * Now the port number 0x1BD7 is found at offset 0x04 of the + * second packet. This time we received five bytes first and + * then the rest. You never know how many bytes you get. + * + * A typical packet for player version 2.0 (beta): + * + * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. + * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 + * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ + * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas + * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B + * + * Port number 0x1BC1 is found at offset 0x0d. + * + * This is just a horrible switch statement. Variable ra tells + * us where we're going. + */ + + bptr = m->m_data; + while (bptr < m->m_data + m->m_len) { + u_short p; + static int ra = 0; + char ra_tbl[4]; + + ra_tbl[0] = 0x50; + ra_tbl[1] = 0x4e; + ra_tbl[2] = 0x41; + ra_tbl[3] = 0; + + switch (ra) { + case 0: + case 2: + case 3: + if (*bptr++ != ra_tbl[ra]) { + ra = 0; + continue; + } + break; + + case 1: + /* + * We may get 0x50 several times, ignore them + */ + if (*bptr == 0x50) { + ra = 1; + bptr++; + continue; + } else if (*bptr++ != ra_tbl[ra]) { + ra = 0; + continue; + } + break; + + case 4: + /* + * skip version number + */ + bptr++; + break; + + case 5: + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of + * the player this may need to be modified. + */ + if (*(bptr + 1) == 0x02) + bptr += 8; + else + bptr += 4; + break; + + case 6: + /* This is the field containing the port + * number that RA-player is listening to. + */ + lport = (((u_char*)bptr)[0] << 8) + + ((u_char *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ + if (lport < 6970 || lport > 7170) + return 1; /* failed */ + + /* try to get udp port between 6970 - 7170 */ + for (p = 6970; p < 7071; p++) { + if (udp_listen(slirp, INADDR_ANY, + htons(p), + so->so_laddr.s_addr, + htons(lport), + SS_FACCEPTONCE)) { + break; + } + } + if (p == 7071) + p = 0; + *(u_char *)bptr++ = (p >> 8) & 0xff; + *(u_char *)bptr = p & 0xff; + ra = 0; + return 1; /* port redirected, we're done */ + break; + + default: + ra = 0; + } + ra++; + } + return 1; + + default: + /* Ooops, not emulated, won't call tcp_emu again */ + so->so_emu = 0; + return 1; + } +} + +/* + * Do misc. config of SLiRP while its running. + * Return 0 if this connections is to be closed, 1 otherwise, + * return 2 if this is a command-line connection + */ +int tcp_ctl(struct socket *so) +{ + Slirp *slirp = so->slirp; + struct sbuf *sb = &so->so_snd; + struct ex_list *ex_ptr; + int do_pty; + + DEBUG_CALL("tcp_ctl"); + DEBUG_ARG("so = %lx", (long )so); + + if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { + /* Check if it's pty_exec */ + for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { + if (ex_ptr->ex_fport == so->so_fport && + so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { + if (ex_ptr->ex_pty == 3) { + so->s = -1; + so->extra = (void *)ex_ptr->ex_exec; + return 1; + } + do_pty = ex_ptr->ex_pty; + DEBUG_MISC((dfd, " executing %s\n", ex_ptr->ex_exec)); + return fork_exec(so, ex_ptr->ex_exec, do_pty); + } + } + } + sb->sb_cc = + snprintf(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), + "Error: No application configured.\r\n"); + sb->sb_wptr += sb->sb_cc; + return 0; +} diff --git a/slirp/tcp_timer.c b/slirp/tcp_timer.c new file mode 100644 index 00000000..6c5bb11c --- /dev/null +++ b/slirp/tcp_timer.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 + * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp + */ + +#include + +static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); + +/* + * Fast timeout routine for processing delayed acks + */ +void +tcp_fasttimo(Slirp *slirp) +{ + register struct socket *so; + register struct tcpcb *tp; + + DEBUG_CALL("tcp_fasttimo"); + + so = slirp->tcb.so_next; + if (so) + for (; so != &slirp->tcb; so = so->so_next) + if ((tp = (struct tcpcb *)so->so_tcpcb) && + (tp->t_flags & TF_DELACK)) { + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + } +} + +/* + * Tcp protocol timeout routine called every 500 ms. + * Updates the timers in all active tcb's and + * causes finite state machine actions if timers expire. + */ +void +tcp_slowtimo(Slirp *slirp) +{ + register struct socket *ip, *ipnxt; + register struct tcpcb *tp; + register int i; + + DEBUG_CALL("tcp_slowtimo"); + + /* + * Search through tcb's and update active timers. + */ + ip = slirp->tcb.so_next; + if (ip == NULL) { + return; + } + for (; ip != &slirp->tcb; ip = ipnxt) { + ipnxt = ip->so_next; + tp = sototcpcb(ip); + if (tp == NULL) { + continue; + } + for (i = 0; i < TCPT_NTIMERS; i++) { + if (tp->t_timer[i] && --tp->t_timer[i] == 0) { + tcp_timers(tp,i); + if (ipnxt->so_prev != ip) + goto tpgone; + } + } + tp->t_idle++; + if (tp->t_rtt) + tp->t_rtt++; +tpgone: + ; + } + slirp->tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ + slirp->tcp_now++; /* for timestamps */ +} + +/* + * Cancel all timers for TCP tp. + */ +void +tcp_canceltimers(struct tcpcb *tp) +{ + register int i; + + for (i = 0; i < TCPT_NTIMERS; i++) + tp->t_timer[i] = 0; +} + +const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; + +/* + * TCP timer processing. + */ +static struct tcpcb * +tcp_timers(register struct tcpcb *tp, int timer) +{ + register int rexmt; + + DEBUG_CALL("tcp_timers"); + + switch (timer) { + + /* + * 2 MSL timeout in shutdown went off. If we're closed but + * still waiting for peer to close and connection has been idle + * too long, or if 2MSL time is up from TIME_WAIT, delete connection + * control block. Otherwise, check again in a bit. + */ + case TCPT_2MSL: + if (tp->t_state != TCPS_TIME_WAIT && + tp->t_idle <= TCP_MAXIDLE) + tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; + else + tp = tcp_close(tp); + break; + + /* + * Retransmission timer went off. Message has not + * been acked within retransmit interval. Back off + * to a longer retransmit interval and retransmit one segment. + */ + case TCPT_REXMT: + + /* + * XXXXX If a packet has timed out, then remove all the queued + * packets for that session. + */ + + if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { + /* + * This is a hack to suit our terminal server here at the uni of canberra + * since they have trouble with zeroes... It usually lets them through + * unharmed, but under some conditions, it'll eat the zeros. If we + * keep retransmitting it, it'll keep eating the zeroes, so we keep + * retransmitting, and eventually the connection dies... + * (this only happens on incoming data) + * + * So, if we were gonna drop the connection from too many retransmits, + * don't... instead halve the t_maxseg, which might break up the NULLs and + * let them through + * + * *sigh* + */ + + tp->t_maxseg >>= 1; + if (tp->t_maxseg < 32) { + /* + * We tried our best, now the connection must die! + */ + tp->t_rxtshift = TCP_MAXRXTSHIFT; + tp = tcp_drop(tp, tp->t_softerror); + /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ + return (tp); /* XXX */ + } + + /* + * Set rxtshift to 6, which is still at the maximum + * backoff time + */ + tp->t_rxtshift = 6; + } + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + TCPT_RANGESET(tp->t_rxtcur, rexmt, + (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */ + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + /* + * If losing, let the lower level know and try for + * a better route. Also, if we backed off this far, + * our srtt estimate is probably bogus. Clobber it + * so we'll take the next rtt measurement as our srtt; + * move the current srtt into rttvar to keep the current + * retransmit times until then. + */ + if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { + tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); + tp->t_srtt = 0; + } + tp->snd_nxt = tp->snd_una; + /* + * If timing a segment in this window, stop the timer. + */ + tp->t_rtt = 0; + /* + * Close the congestion window down to one segment + * (we'll open it by one segment for each ack we get). + * Since we probably have a window's worth of unacked + * data accumulated, this "slow start" keeps us from + * dumping all that data as back-to-back packets (which + * might overwhelm an intermediate gateway). + * + * There are two phases to the opening: Initially we + * open by one mss on each ack. This makes the window + * size increase exponentially with time. If the + * window is larger than the path can handle, this + * exponential growth results in dropped packet(s) + * almost immediately. To get more time between + * drops but still "push" the network to take advantage + * of improving conditions, we switch from exponential + * to linear window opening at some threshold size. + * For a threshold, we use half the current window + * size, truncated to a multiple of the mss. + * + * (the minimum cwnd that will give us exponential + * growth is 2 mss. We don't allow the threshold + * to go below this.) + */ + { + u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; + if (win < 2) + win = 2; + tp->snd_cwnd = tp->t_maxseg; + tp->snd_ssthresh = win * tp->t_maxseg; + tp->t_dupacks = 0; + } + (void) tcp_output(tp); + break; + + /* + * Persistence timer into zero window. + * Force a byte to be output, if possible. + */ + case TCPT_PERSIST: + tcp_setpersist(tp); + tp->t_force = 1; + (void) tcp_output(tp); + tp->t_force = 0; + break; + + /* + * Keep-alive timer went off; send something + * or drop connection if idle for too long. + */ + case TCPT_KEEP: + if (tp->t_state < TCPS_ESTABLISHED) + goto dropit; + + if ((SO_OPTIONS) && tp->t_state <= TCPS_CLOSE_WAIT) { + if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) + goto dropit; + /* + * Send a packet designed to force a response + * if the peer is up and reachable: + * either an ACK if the connection is still alive, + * or an RST if the peer has closed the connection + * due to timeout or reboot. + * Using sequence number tp->snd_una-1 + * causes the transmitted zero-length segment + * to lie outside the receive window; + * by the protocol spec, this requires the + * correspondent TCP to respond. + */ + tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); + tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; + } else + tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; + break; + + dropit: + tp = tcp_drop(tp, 0); + break; + } + + return (tp); +} diff --git a/slirp/tcp_timer.h b/slirp/tcp_timer.h new file mode 100644 index 00000000..ff17914f --- /dev/null +++ b/slirp/tcp_timer.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 + * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp + */ + +#ifndef _TCP_TIMER_H_ +#define _TCP_TIMER_H_ + +/* + * Definitions of the TCP timers. These timers are counted + * down PR_SLOWHZ times a second. + */ +#define TCPT_NTIMERS 4 + +#define TCPT_REXMT 0 /* retransmit */ +#define TCPT_PERSIST 1 /* retransmit persistence */ +#define TCPT_KEEP 2 /* keep alive */ +#define TCPT_2MSL 3 /* 2*msl quiet time timer */ + +/* + * The TCPT_REXMT timer is used to force retransmissions. + * The TCP has the TCPT_REXMT timer set whenever segments + * have been sent for which ACKs are expected but not yet + * received. If an ACK is received which advances tp->snd_una, + * then the retransmit timer is cleared (if there are no more + * outstanding segments) or reset to the base value (if there + * are more ACKs expected). Whenever the retransmit timer goes off, + * we retransmit one unacknowledged segment, and do a backoff + * on the retransmit timer. + * + * The TCPT_PERSIST timer is used to keep window size information + * flowing even if the window goes shut. If all previous transmissions + * have been acknowledged (so that there are no retransmissions in progress), + * and the window is too small to bother sending anything, then we start + * the TCPT_PERSIST timer. When it expires, if the window is nonzero, + * we go to transmit state. Otherwise, at intervals send a single byte + * into the peer's window to force him to update our window information. + * We do this at most as often as TCPT_PERSMIN time intervals, + * but no more frequently than the current estimate of round-trip + * packet time. The TCPT_PERSIST timer is cleared whenever we receive + * a window update from the peer. + * + * The TCPT_KEEP timer is used to keep connections alive. If an + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, + * but not yet established, then we drop the connection. Once the connection + * is established, if the connection is idle for TCPTV_KEEP_IDLE time + * (and keepalives have been enabled on the socket), we begin to probe + * the connection. We force the peer to send us a segment by sending: + * + * This segment is (deliberately) outside the window, and should elicit + * an ack segment in response from the peer. If, despite the TCPT_KEEP + * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE + * amount of time probing, then we drop the connection. + */ + +/* + * Time constants. + */ +#define TCPTV_MSL ( 5*PR_SLOWHZ) /* max seg lifetime (hah!) */ + +#define TCPTV_SRTTBASE 0 /* base roundtrip time; + if 0, no idea yet */ +#define TCPTV_SRTTDFLT ( 3*PR_SLOWHZ) /* assumed RTT if no info */ + +#define TCPTV_PERSMIN ( 5*PR_SLOWHZ) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*PR_SLOWHZ) /* maximum persist interval */ + +#define TCPTV_KEEP_INIT ( 75*PR_SLOWHZ) /* initial connect keep alive */ +#define TCPTV_KEEP_IDLE (120*60*PR_SLOWHZ) /* dflt time before probing */ +#define TCPTV_KEEPINTVL ( 75*PR_SLOWHZ) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_MIN ( 1*PR_SLOWHZ) /* minimum allowable value */ +#define TCPTV_REXMTMAX ( 12*PR_SLOWHZ) /* max allowable REXMT value */ + +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ + +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ + + +/* + * Force a time value to be in a certain range. + */ +#define TCPT_RANGESET(tv, value, tvmin, tvmax) { \ + (tv) = (value); \ + if ((tv) < (tvmin)) \ + (tv) = (tvmin); \ + else if ((tv) > (tvmax)) \ + (tv) = (tvmax); \ +} + +extern const int tcp_backoff[]; + +struct tcpcb; + +void tcp_fasttimo(Slirp *); +void tcp_slowtimo(Slirp *); +void tcp_canceltimers(struct tcpcb *); + +#endif diff --git a/slirp/tcp_var.h b/slirp/tcp_var.h new file mode 100644 index 00000000..004193fb --- /dev/null +++ b/slirp/tcp_var.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 1982, 1986, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 + * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp + */ + +#ifndef _TCP_VAR_H_ +#define _TCP_VAR_H_ + +#include "tcpip.h" +#include "tcp_timer.h" + +/* + * Tcp control block, one per tcp; fields: + */ +struct tcpcb { + struct tcpiphdr *seg_next; /* sequencing queue */ + struct tcpiphdr *seg_prev; + short t_state; /* state of this connection */ + short t_timer[TCPT_NTIMERS]; /* tcp timers */ + short t_rxtshift; /* log(2) of rexmt exp. backoff */ + short t_rxtcur; /* current retransmit value */ + short t_dupacks; /* consecutive dup acks recd */ + u_short t_maxseg; /* maximum segment size */ + char t_force; /* 1 if forcing out a byte */ + u_short t_flags; +#define TF_ACKNOW 0x0001 /* ack peer immediately */ +#define TF_DELACK 0x0002 /* ack, but try to delay it */ +#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x0008 /* don't use tcp options */ +#define TF_SENTFIN 0x0010 /* have sent FIN */ +#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ + + struct tcpiphdr t_template; /* static skeletal packet for xmit */ + + struct socket *t_socket; /* back pointer to socket */ +/* + * The following fields are used as in the protocol specification. + * See RFC783, Dec. 1981, page 21. + */ +/* send sequence variables */ + tcp_seq snd_una; /* send unacknowledged */ + tcp_seq snd_nxt; /* send next */ + tcp_seq snd_up; /* send urgent pointer */ + tcp_seq snd_wl1; /* window update seg seq number */ + tcp_seq snd_wl2; /* window update seg ack number */ + tcp_seq iss; /* initial send sequence number */ + uint32_t snd_wnd; /* send window */ +/* receive sequence variables */ + uint32_t rcv_wnd; /* receive window */ + tcp_seq rcv_nxt; /* receive next */ + tcp_seq rcv_up; /* receive urgent pointer */ + tcp_seq irs; /* initial receive sequence number */ +/* + * Additional variables for this implementation. + */ +/* receive variables */ + tcp_seq rcv_adv; /* advertised window */ +/* retransmit variables */ + tcp_seq snd_max; /* highest sequence number sent; + * used to recognize retransmits + */ +/* congestion control (for slow start, source quench, retransmit after loss) */ + uint32_t snd_cwnd; /* congestion-controlled window */ + uint32_t snd_ssthresh; /* snd_cwnd size threshold for + * for slow start exponential to + * linear switch + */ +/* + * transmit timing stuff. See below for scale of srtt and rttvar. + * "Variance" is actually smoothed difference. + */ + short t_idle; /* inactivity time */ + short t_rtt; /* round trip time */ + tcp_seq t_rtseq; /* sequence number being timed */ + short t_srtt; /* smoothed round-trip time */ + short t_rttvar; /* variance in round-trip time */ + u_short t_rttmin; /* minimum rtt allowed */ + uint32_t max_sndwnd; /* largest window peer has offered */ + +/* out-of-band data */ + char t_oobflags; /* have some */ + char t_iobc; /* input character */ +#define TCPOOB_HAVEDATA 0x01 +#define TCPOOB_HADDATA 0x02 + short t_softerror; /* possible error not yet reported */ + +/* RFC 1323 variables */ + u_char snd_scale; /* window scaling for send window */ + u_char rcv_scale; /* window scaling for recv window */ + u_char request_r_scale; /* pending window scaling */ + u_char requested_s_scale; + uint32_t ts_recent; /* timestamp echo data */ + uint32_t ts_recent_age; /* when last updated */ + tcp_seq last_ack_sent; + +}; + +#define sototcpcb(so) ((so)->so_tcpcb) + +/* + * The smoothed round-trip time and estimated variance + * are stored as fixed point numbers scaled by the values below. + * For convenience, these scales are also used in smoothing the average + * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). + * With these scales, srtt has 3 bits to the right of the binary point, + * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the + * binary point, and is smoothed with an ALPHA of 0.75. + */ +#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ +#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ +#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ +#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ + +/* + * The initial retransmission should happen at rtt + 4 * rttvar. + * Because of the way we do the smoothing, srtt and rttvar + * will each average +1/2 tick of bias. When we compute + * the retransmit timer, we want 1/2 tick of rounding and + * 1 extra tick because of +-1/2 tick uncertainty in the + * firing of the timer. The bias will give us exactly the + * 1.5 tick we need. But, because the bias is + * statistical, we have to test that we don't drop below + * the minimum feasible timer (which is 2 ticks). + * This macro assumes that the value of TCP_RTTVAR_SCALE + * is the same as the multiplier for rttvar. + */ +#define TCP_REXMTVAL(tp) \ + (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) + +#endif diff --git a/slirp/tcpip.h b/slirp/tcpip.h new file mode 100644 index 00000000..7974ce3d --- /dev/null +++ b/slirp/tcpip.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 + * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp + */ + +#ifndef _TCPIP_H_ +#define _TCPIP_H_ + +/* + * Tcp+ip header, after ip options removed. + */ +struct tcpiphdr { + struct ipovly ti_i; /* overlaid ip structure */ + struct tcphdr ti_t; /* tcp header */ +}; +#define ti_mbuf ti_i.ih_mbuf.mptr +#define ti_x1 ti_i.ih_x1 +#define ti_pr ti_i.ih_pr +#define ti_len ti_i.ih_len +#define ti_src ti_i.ih_src +#define ti_dst ti_i.ih_dst +#define ti_sport ti_t.th_sport +#define ti_dport ti_t.th_dport +#define ti_seq ti_t.th_seq +#define ti_ack ti_t.th_ack +#define ti_x2 ti_t.th_x2 +#define ti_off ti_t.th_off +#define ti_flags ti_t.th_flags +#define ti_win ti_t.th_win +#define ti_sum ti_t.th_sum +#define ti_urp ti_t.th_urp + +#define tcpiphdr2qlink(T) ((struct qlink*)(((char*)(T)) - sizeof(struct qlink))) +#define qlink2tcpiphdr(Q) ((struct tcpiphdr*)(((char*)(Q)) + sizeof(struct qlink))) +#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) +#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) +#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) +#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink*)(T)) +#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr*)(T)) + +/* + * Just a clean way to get to the first byte + * of the packet + */ +struct tcpiphdr_2 { + struct tcpiphdr dummy; + char first_char; +}; + +#endif diff --git a/slirp/tftp.c b/slirp/tftp.c new file mode 100644 index 00000000..a329fb28 --- /dev/null +++ b/slirp/tftp.c @@ -0,0 +1,442 @@ +/* + * tftp.c - a simple, read-only tftp server for qemu + * + * Copyright (c) 2004 Magnus Damm + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include "qemu-common.h" + +static inline int tftp_session_in_use(struct tftp_session *spt) +{ + return (spt->slirp != NULL); +} + +static inline void tftp_session_update(struct tftp_session *spt) +{ + spt->timestamp = curtime; +} + +static void tftp_session_terminate(struct tftp_session *spt) +{ + if (spt->fd >= 0) { + close(spt->fd); + spt->fd = -1; + } + g_free(spt->filename); + spt->slirp = NULL; +} + +static int tftp_session_allocate(Slirp *slirp, struct tftp_t *tp) +{ + struct tftp_session *spt; + int k; + + for (k = 0; k < TFTP_SESSIONS_MAX; k++) { + spt = &slirp->tftp_sessions[k]; + + if (!tftp_session_in_use(spt)) + goto found; + + /* sessions time out after 5 inactive seconds */ + if ((int)(curtime - spt->timestamp) > 5000) { + tftp_session_terminate(spt); + goto found; + } + } + + return -1; + + found: + memset(spt, 0, sizeof(*spt)); + memcpy(&spt->client_ip, &tp->ip.ip_src, sizeof(spt->client_ip)); + spt->fd = -1; + spt->client_port = tp->udp.uh_sport; + spt->slirp = slirp; + + tftp_session_update(spt); + + return k; +} + +static int tftp_session_find(Slirp *slirp, struct tftp_t *tp) +{ + struct tftp_session *spt; + int k; + + for (k = 0; k < TFTP_SESSIONS_MAX; k++) { + spt = &slirp->tftp_sessions[k]; + + if (tftp_session_in_use(spt)) { + if (!memcmp(&spt->client_ip, &tp->ip.ip_src, sizeof(spt->client_ip))) { + if (spt->client_port == tp->udp.uh_sport) { + return k; + } + } + } + } + + return -1; +} + +static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, + uint8_t *buf, int len) +{ + int bytes_read = 0; + + if (spt->fd < 0) { + spt->fd = open(spt->filename, O_RDONLY | O_BINARY); + } + + if (spt->fd < 0) { + return -1; + } + + if (len) { + lseek(spt->fd, block_nr * 512, SEEK_SET); + + bytes_read = read(spt->fd, buf, len); + } + + return bytes_read; +} + +static int tftp_send_oack(struct tftp_session *spt, + const char *keys[], uint32_t values[], int nb, + struct tftp_t *recv_tp) +{ + struct sockaddr_in saddr, daddr; + struct mbuf *m; + struct tftp_t *tp; + int i, n = 0; + + m = m_get(spt->slirp); + + if (!m) + return -1; + + memset(m->m_data, 0, m->m_size); + + m->m_data += IF_MAXLINKHDR; + tp = (void *)m->m_data; + m->m_data += sizeof(struct udpiphdr); + + tp->tp_op = htons(TFTP_OACK); + for (i = 0; i < nb; i++) { + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", + keys[i]) + 1; + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", + values[i]) + 1; + } + + saddr.sin_addr = recv_tp->ip.ip_dst; + saddr.sin_port = recv_tp->udp.uh_dport; + + daddr.sin_addr = spt->client_ip; + daddr.sin_port = spt->client_port; + + m->m_len = sizeof(struct tftp_t) - 514 + n - + sizeof(struct ip) - sizeof(struct udphdr); + udp_output2(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); + + return 0; +} + +static void tftp_send_error(struct tftp_session *spt, + uint16_t errorcode, const char *msg, + struct tftp_t *recv_tp) +{ + struct sockaddr_in saddr, daddr; + struct mbuf *m; + struct tftp_t *tp; + + m = m_get(spt->slirp); + + if (!m) { + goto out; + } + + memset(m->m_data, 0, m->m_size); + + m->m_data += IF_MAXLINKHDR; + tp = (void *)m->m_data; + m->m_data += sizeof(struct udpiphdr); + + tp->tp_op = htons(TFTP_ERROR); + tp->x.tp_error.tp_error_code = htons(errorcode); + pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), msg); + + saddr.sin_addr = recv_tp->ip.ip_dst; + saddr.sin_port = recv_tp->udp.uh_dport; + + daddr.sin_addr = spt->client_ip; + daddr.sin_port = spt->client_port; + + m->m_len = sizeof(struct tftp_t) - 514 + 3 + strlen(msg) - + sizeof(struct ip) - sizeof(struct udphdr); + + udp_output2(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); + +out: + tftp_session_terminate(spt); +} + +static void tftp_send_next_block(struct tftp_session *spt, + struct tftp_t *recv_tp) +{ + struct sockaddr_in saddr, daddr; + struct mbuf *m; + struct tftp_t *tp; + int nobytes; + + m = m_get(spt->slirp); + + if (!m) { + return; + } + + memset(m->m_data, 0, m->m_size); + + m->m_data += IF_MAXLINKHDR; + tp = (void *)m->m_data; + m->m_data += sizeof(struct udpiphdr); + + tp->tp_op = htons(TFTP_DATA); + tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); + + saddr.sin_addr = recv_tp->ip.ip_dst; + saddr.sin_port = recv_tp->udp.uh_dport; + + daddr.sin_addr = spt->client_ip; + daddr.sin_port = spt->client_port; + + nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, 512); + + if (nobytes < 0) { + m_free(m); + + /* send "file not found" error back */ + + tftp_send_error(spt, 1, "File not found", tp); + + return; + } + + m->m_len = sizeof(struct tftp_t) - (512 - nobytes) - + sizeof(struct ip) - sizeof(struct udphdr); + + udp_output2(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); + + if (nobytes == 512) { + tftp_session_update(spt); + } + else { + tftp_session_terminate(spt); + } + + spt->block_nr++; +} + +static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen) +{ + struct tftp_session *spt; + int s, k; + size_t prefix_len; + char *req_fname; + const char *option_name[2]; + uint32_t option_value[2]; + int nb_options = 0; + + /* check if a session already exists and if so terminate it */ + s = tftp_session_find(slirp, tp); + if (s >= 0) { + tftp_session_terminate(&slirp->tftp_sessions[s]); + } + + s = tftp_session_allocate(slirp, tp); + + if (s < 0) { + return; + } + + spt = &slirp->tftp_sessions[s]; + + /* unspecified prefix means service disabled */ + if (!slirp->tftp_prefix) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + /* skip header fields */ + k = 0; + pktlen -= offsetof(struct tftp_t, x.tp_buf); + + /* prepend tftp_prefix */ + prefix_len = strlen(slirp->tftp_prefix); + spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); + memcpy(spt->filename, slirp->tftp_prefix, prefix_len); + spt->filename[prefix_len] = '/'; + + /* get name */ + req_fname = spt->filename + prefix_len + 1; + + while (1) { + if (k >= TFTP_FILENAME_MAX || k >= pktlen) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + req_fname[k] = tp->x.tp_buf[k]; + if (req_fname[k++] == '\0') { + break; + } + } + + /* check mode */ + if ((pktlen - k) < 6) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { + tftp_send_error(spt, 4, "Unsupported transfer mode", tp); + return; + } + + k += 6; /* skipping octet */ + + /* do sanity checks on the filename */ + if (!strncmp(req_fname, "../", 3) || + req_fname[strlen(req_fname) - 1] == '/' || + strstr(req_fname, "/../")) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + /* check if the file exists */ + if (tftp_read_data(spt, 0, NULL, 0) < 0) { + tftp_send_error(spt, 1, "File not found", tp); + return; + } + + if (tp->x.tp_buf[pktlen - 1] != 0) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + while (k < pktlen && nb_options < ARRAY_SIZE(option_name)) { + const char *key, *value; + + key = &tp->x.tp_buf[k]; + k += strlen(key) + 1; + + if (k >= pktlen) { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } + + value = &tp->x.tp_buf[k]; + k += strlen(value) + 1; + + if (strcasecmp(key, "tsize") == 0) { + int tsize = atoi(value); + struct stat stat_p; + + if (tsize == 0) { + if (stat(spt->filename, &stat_p) == 0) + tsize = stat_p.st_size; + else { + tftp_send_error(spt, 1, "File not found", tp); + return; + } + } + + option_name[nb_options] = "tsize"; + option_value[nb_options] = tsize; + nb_options++; + } else if (strcasecmp(key, "blksize") == 0) { + int blksize = atoi(value); + + /* If blksize option is bigger than what we will + * emit, accept the option with our packet size. + * Otherwise, simply do as we didn't see the option. + */ + if (blksize >= 512) { + option_name[nb_options] = "blksize"; + option_value[nb_options] = 512; + nb_options++; + } + } + } + + if (nb_options > 0) { + assert(nb_options <= ARRAY_SIZE(option_name)); + tftp_send_oack(spt, option_name, option_value, nb_options, tp); + return; + } + + spt->block_nr = 0; + tftp_send_next_block(spt, tp); +} + +static void tftp_handle_ack(Slirp *slirp, struct tftp_t *tp, int pktlen) +{ + int s; + + s = tftp_session_find(slirp, tp); + + if (s < 0) { + return; + } + + tftp_send_next_block(&slirp->tftp_sessions[s], tp); +} + +static void tftp_handle_error(Slirp *slirp, struct tftp_t *tp, int pktlen) +{ + int s; + + s = tftp_session_find(slirp, tp); + + if (s < 0) { + return; + } + + tftp_session_terminate(&slirp->tftp_sessions[s]); +} + +void tftp_input(struct mbuf *m) +{ + struct tftp_t *tp = (struct tftp_t *)m->m_data; + + switch(ntohs(tp->tp_op)) { + case TFTP_RRQ: + tftp_handle_rrq(m->slirp, tp, m->m_len); + break; + + case TFTP_ACK: + tftp_handle_ack(m->slirp, tp, m->m_len); + break; + + case TFTP_ERROR: + tftp_handle_error(m->slirp, tp, m->m_len); + break; + } +} diff --git a/slirp/tftp.h b/slirp/tftp.h new file mode 100644 index 00000000..e1cc24b9 --- /dev/null +++ b/slirp/tftp.h @@ -0,0 +1,49 @@ +/* tftp defines */ +#ifndef SLIRP_TFTP_H +#define SLIRP_TFTP_H 1 + +#define TFTP_SESSIONS_MAX 20 + +#define TFTP_SERVER 69 + +#define TFTP_RRQ 1 +#define TFTP_WRQ 2 +#define TFTP_DATA 3 +#define TFTP_ACK 4 +#define TFTP_ERROR 5 +#define TFTP_OACK 6 + +#define TFTP_FILENAME_MAX 512 + +struct tftp_t { + struct ip ip; + struct udphdr udp; + uint16_t tp_op; + union { + struct { + uint16_t tp_block_nr; + uint8_t tp_buf[512]; + } tp_data; + struct { + uint16_t tp_error_code; + uint8_t tp_msg[512]; + } tp_error; + char tp_buf[512 + 2]; + } x; +}; + +struct tftp_session { + Slirp *slirp; + char *filename; + int fd; + + struct in_addr client_ip; + uint16_t client_port; + uint32_t block_nr; + + int timestamp; +}; + +void tftp_input(struct mbuf *m); + +#endif diff --git a/slirp/udp.c b/slirp/udp.c new file mode 100644 index 00000000..f77e00f5 --- /dev/null +++ b/slirp/udp.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 + * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp + */ + +/* + * Changes and additions relating to SLiRP + * Copyright (c) 1995 Danny Gasparovski. + * + * Please read the file COPYRIGHT for the + * terms and conditions of the copyright. + */ + +#include +#include "ip_icmp.h" + +static uint8_t udp_tos(struct socket *so); + +void +udp_init(Slirp *slirp) +{ + slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; + slirp->udp_last_so = &slirp->udb; +} + +void udp_cleanup(Slirp *slirp) +{ + while (slirp->udb.so_next != &slirp->udb) { + udp_detach(slirp->udb.so_next); + } +} + +/* m->m_data points at ip packet header + * m->m_len length ip packet + * ip->ip_len length data (IPDU) + */ +void +udp_input(register struct mbuf *m, int iphlen) +{ + Slirp *slirp = m->slirp; + register struct ip *ip; + register struct udphdr *uh; + int len; + struct ip save_ip; + struct socket *so; + + DEBUG_CALL("udp_input"); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("iphlen = %d", iphlen); + + /* + * Strip IP options, if any; should skip this, + * make available to user, and use on returned packets, + * but we don't yet have a way to check the checksum + * with options still present. + */ + if(iphlen > sizeof(struct ip)) { + ip_stripoptions(m, (struct mbuf *)0); + iphlen = sizeof(struct ip); + } + + /* + * Get IP and UDP header together in first mbuf. + */ + ip = mtod(m, struct ip *); + uh = (struct udphdr *)((caddr_t)ip + iphlen); + + /* + * Make mbuf data length reflect UDP length. + * If not enough data to reflect UDP length, drop. + */ + len = ntohs((uint16_t)uh->uh_ulen); + + if (ip->ip_len != len) { + if (len > ip->ip_len) { + goto bad; + } + m_adj(m, len - ip->ip_len); + ip->ip_len = len; + } + + /* + * Save a copy of the IP header in case we want restore it + * for sending an ICMP error message in response. + */ + save_ip = *ip; + save_ip.ip_len+= iphlen; /* tcp_input subtracts this */ + + /* + * Checksum extended UDP header and data. + */ + if (uh->uh_sum) { + memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); + ((struct ipovly *)ip)->ih_x1 = 0; + ((struct ipovly *)ip)->ih_len = uh->uh_ulen; + if(cksum(m, len + sizeof(struct ip))) { + goto bad; + } + } + + /* + * handle DHCP/BOOTP + */ + if (ntohs(uh->uh_dport) == BOOTP_SERVER && + (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || + ip->ip_dst.s_addr == 0xffffffff)) { + bootp_input(m); + goto bad; + } + + /* + * handle TFTP + */ + if (ntohs(uh->uh_dport) == TFTP_SERVER && + ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { + tftp_input(m); + goto bad; + } + + if (slirp->restricted) { + goto bad; + } + + /* + * Locate pcb for datagram. + */ + so = slirp->udp_last_so; + if (so == &slirp->udb || so->so_lport != uh->uh_sport || + so->so_laddr.s_addr != ip->ip_src.s_addr) { + struct socket *tmp; + + for (tmp = slirp->udb.so_next; tmp != &slirp->udb; + tmp = tmp->so_next) { + if (tmp->so_lport == uh->uh_sport && + tmp->so_laddr.s_addr == ip->ip_src.s_addr) { + so = tmp; + break; + } + } + if (tmp == &slirp->udb) { + so = NULL; + } else { + slirp->udp_last_so = so; + } + } + + if (so == NULL) { + /* + * If there's no socket for this packet, + * create one + */ + so = socreate(slirp); + if (!so) { + goto bad; + } + if(udp_attach(so) == -1) { + DEBUG_MISC((dfd," udp_attach errno = %d-%s\n", + errno,strerror(errno))); + sofree(so); + goto bad; + } + + /* + * Setup fields + */ + so->so_laddr = ip->ip_src; + so->so_lport = uh->uh_sport; + + if ((so->so_iptos = udp_tos(so)) == 0) + so->so_iptos = ip->ip_tos; + + /* + * XXXXX Here, check if it's in udpexec_list, + * and if it is, do the fork_exec() etc. + */ + } + + so->so_faddr = ip->ip_dst; /* XXX */ + so->so_fport = uh->uh_dport; /* XXX */ + + iphlen += sizeof(struct udphdr); + m->m_len -= iphlen; + m->m_data += iphlen; + + /* + * Now we sendto() the packet. + */ + if(sosendto(so,m) == -1) { + m->m_len += iphlen; + m->m_data -= iphlen; + *ip=save_ip; + DEBUG_MISC((dfd,"udp tx errno = %d-%s\n",errno,strerror(errno))); + icmp_error(m, ICMP_UNREACH,ICMP_UNREACH_NET, 0,strerror(errno)); + } + + m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ + + /* restore the orig mbuf packet */ + m->m_len += iphlen; + m->m_data -= iphlen; + *ip=save_ip; + so->so_m=m; /* ICMP backup */ + + return; +bad: + m_free(m); +} + +int udp_output2(struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos) +{ + register struct udpiphdr *ui; + int error = 0; + + DEBUG_CALL("udp_output"); + DEBUG_ARG("so = %lx", (long)so); + DEBUG_ARG("m = %lx", (long)m); + DEBUG_ARG("saddr = %lx", (long)saddr->sin_addr.s_addr); + DEBUG_ARG("daddr = %lx", (long)daddr->sin_addr.s_addr); + + /* + * Adjust for header + */ + m->m_data -= sizeof(struct udpiphdr); + m->m_len += sizeof(struct udpiphdr); + + /* + * Fill in mbuf with extended UDP header + * and addresses and length put into network format. + */ + ui = mtod(m, struct udpiphdr *); + memset(&ui->ui_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr)); + ui->ui_x1 = 0; + ui->ui_pr = IPPROTO_UDP; + ui->ui_len = htons(m->m_len - sizeof(struct ip)); + /* XXXXX Check for from-one-location sockets, or from-any-location sockets */ + ui->ui_src = saddr->sin_addr; + ui->ui_dst = daddr->sin_addr; + ui->ui_sport = saddr->sin_port; + ui->ui_dport = daddr->sin_port; + ui->ui_ulen = ui->ui_len; + + /* + * Stuff checksum and output datagram. + */ + ui->ui_sum = 0; + if ((ui->ui_sum = cksum(m, m->m_len)) == 0) + ui->ui_sum = 0xffff; + ((struct ip *)ui)->ip_len = m->m_len; + + ((struct ip *)ui)->ip_ttl = IPDEFTTL; + ((struct ip *)ui)->ip_tos = iptos; + + error = ip_output(so, m); + + return (error); +} + +int udp_output(struct socket *so, struct mbuf *m, + struct sockaddr_in *addr) + +{ + Slirp *slirp = so->slirp; + struct sockaddr_in saddr, daddr; + + saddr = *addr; + if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == + slirp->vnetwork_addr.s_addr) { + uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; + + if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { + saddr.sin_addr = slirp->vhost_addr; + } else if (addr->sin_addr.s_addr == loopback_addr.s_addr || + so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { + saddr.sin_addr = so->so_faddr; + } + } + daddr.sin_addr = so->so_laddr; + daddr.sin_port = so->so_lport; + + return udp_output2(so, m, &saddr, &daddr, so->so_iptos); +} + +int +udp_attach(struct socket *so) +{ + if((so->s = qemu_socket(AF_INET,SOCK_DGRAM,0)) != -1) { + so->so_expire = curtime + SO_EXPIRE; + insque(so, &so->slirp->udb); + } + return(so->s); +} + +void +udp_detach(struct socket *so) +{ + closesocket(so->s); + sofree(so); +} + +static const struct tos_t udptos[] = { + {0, 53, IPTOS_LOWDELAY, 0}, /* DNS */ + {0, 0, 0, 0} +}; + +static uint8_t +udp_tos(struct socket *so) +{ + int i = 0; + + while(udptos[i].tos) { + if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || + (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { + so->so_emu = udptos[i].emu; + return udptos[i].tos; + } + i++; + } + + return 0; +} + +struct socket * +udp_listen(Slirp *slirp, uint32_t haddr, u_int hport, uint32_t laddr, + u_int lport, int flags) +{ + struct sockaddr_in addr; + struct socket *so; + socklen_t addrlen = sizeof(struct sockaddr_in); + + so = socreate(slirp); + if (!so) { + return NULL; + } + so->s = qemu_socket(AF_INET,SOCK_DGRAM,0); + so->so_expire = curtime + SO_EXPIRE; + insque(so, &slirp->udb); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = haddr; + addr.sin_port = hport; + + if (bind(so->s,(struct sockaddr *)&addr, addrlen) < 0) { + udp_detach(so); + return NULL; + } + socket_set_fast_reuse(so->s); + + getsockname(so->s,(struct sockaddr *)&addr,&addrlen); + so->so_fport = addr.sin_port; + if (addr.sin_addr.s_addr == 0 || + addr.sin_addr.s_addr == loopback_addr.s_addr) { + so->so_faddr = slirp->vhost_addr; + } else { + so->so_faddr = addr.sin_addr; + } + so->so_lport = lport; + so->so_laddr.s_addr = laddr; + if (flags != SS_FACCEPTONCE) + so->so_expire = 0; + + so->so_state &= SS_PERSISTENT_MASK; + so->so_state |= SS_ISFCONNECTED | flags; + + return so; +} diff --git a/slirp/udp.h b/slirp/udp.h new file mode 100644 index 00000000..9bf31fe7 --- /dev/null +++ b/slirp/udp.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp.h 8.1 (Berkeley) 6/10/93 + * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp + */ + +#ifndef _UDP_H_ +#define _UDP_H_ + +#define UDP_TTL 0x60 +#define UDP_UDPDATALEN 16192 + +/* + * Udp protocol header. + * Per RFC 768, September, 1981. + */ +struct udphdr { + uint16_t uh_sport; /* source port */ + uint16_t uh_dport; /* destination port */ + int16_t uh_ulen; /* udp length */ + uint16_t uh_sum; /* udp checksum */ +}; + +/* + * UDP kernel structures and variables. + */ +struct udpiphdr { + struct ipovly ui_i; /* overlaid ip structure */ + struct udphdr ui_u; /* udp header */ +}; +#define ui_mbuf ui_i.ih_mbuf.mptr +#define ui_x1 ui_i.ih_x1 +#define ui_pr ui_i.ih_pr +#define ui_len ui_i.ih_len +#define ui_src ui_i.ih_src +#define ui_dst ui_i.ih_dst +#define ui_sport ui_u.uh_sport +#define ui_dport ui_u.uh_dport +#define ui_ulen ui_u.uh_ulen +#define ui_sum ui_u.uh_sum + +/* + * Names for UDP sysctl objects + */ +#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ +#define UDPCTL_MAXID 2 + +struct mbuf; + +void udp_init(Slirp *); +void udp_cleanup(Slirp *); +void udp_input(register struct mbuf *, int); +int udp_output(struct socket *, struct mbuf *, struct sockaddr_in *); +int udp_attach(struct socket *); +void udp_detach(struct socket *); +struct socket * udp_listen(Slirp *, uint32_t, u_int, uint32_t, u_int, + int); +int udp_output2(struct socket *so, struct mbuf *m, + struct sockaddr_in *saddr, struct sockaddr_in *daddr, + int iptos); +#endif -- cgit v1.2.3