diff options
Diffstat (limited to 'package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch')
-rw-r--r-- | package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch b/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch new file mode 100644 index 0000000000..453c2924b2 --- /dev/null +++ b/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch @@ -0,0 +1,495 @@ +From a799ca0c6314ad73a97bc6c89382d2712a9c0b0e Mon Sep 17 00:00:00 2001 +From: Simon Kelley <simon@thekelleys.org.uk> +Date: Thu, 18 Oct 2018 19:35:29 +0100 +Subject: [PATCH 01/11] Impove cache behaviour for TCP connections. + +For ease of implementaion, dnsmasq has always forked a new process to +handle each incoming TCP connection. A side-effect of this is that any +DNS queries answered from TCP connections are not cached: when TCP +connections were rare, this was not a problem. With the coming of +DNSSEC, it's now the case that some DNSSEC queries have answers which +spill to TCP, and if, for instance, this applies to the keys for the +root then those never get cached, and performance is very bad. This +fix passes cache entries back from the TCP child process to the main +server process, and fixes the problem. + +Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk> +--- + CHANGELOG | 14 ++++ + src/blockdata.c | 37 ++++++++- + src/cache.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++-- + src/dnsmasq.c | 58 ++++++++++++-- + src/dnsmasq.h | 5 ++ + 5 files changed, 291 insertions(+), 19 deletions(-) + +--- a/CHANGELOG ++++ b/CHANGELOG +@@ -1,3 +1,17 @@ ++version 2.81 ++ Impove cache behaviour for TCP connections. For ease of ++ implementaion, dnsmasq has always forked a new process to handle ++ each incoming TCP connection. A side-effect of this is that ++ any DNS queries answered from TCP connections are not cached: ++ when TCP connections were rare, this was not a problem. ++ With the coming of DNSSEC, it's now the case that some ++ DNSSEC queries have answers which spill to TCP, and if, ++ for instance, this applies to the keys for the root then ++ those never get cached, and performance is very bad. ++ This fix passes cache entries back from the TCP child process to ++ the main server process, and fixes the problem. ++ ++ + version 2.80 + Add support for RFC 4039 DHCP rapid commit. Thanks to Ashram Method + for the initial patch and motivation. +--- a/src/blockdata.c ++++ b/src/blockdata.c +@@ -61,7 +61,7 @@ void blockdata_report(void) + blockdata_alloced * sizeof(struct blockdata)); + } + +-struct blockdata *blockdata_alloc(char *data, size_t len) ++static struct blockdata *blockdata_alloc_real(int fd, char *data, size_t len) + { + struct blockdata *block, *ret = NULL; + struct blockdata **prev = &ret; +@@ -89,8 +89,17 @@ struct blockdata *blockdata_alloc(char * + blockdata_hwm = blockdata_count; + + blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len; +- memcpy(block->key, data, blen); +- data += blen; ++ if (data) ++ { ++ memcpy(block->key, data, blen); ++ data += blen; ++ } ++ else if (!read_write(fd, block->key, blen, 1)) ++ { ++ /* failed read free partial chain */ ++ blockdata_free(ret); ++ return NULL; ++ } + len -= blen; + *prev = block; + prev = &block->next; +@@ -100,6 +109,10 @@ struct blockdata *blockdata_alloc(char * + return ret; + } + ++struct blockdata *blockdata_alloc(char *data, size_t len) ++{ ++ return blockdata_alloc_real(0, data, len); ++} + + void blockdata_free(struct blockdata *blocks) + { +@@ -148,5 +161,21 @@ void *blockdata_retrieve(struct blockdat + + return data; + } +- ++ ++ ++void blockdata_write(struct blockdata *block, size_t len, int fd) ++{ ++ for (; len > 0 && block; block = block->next) ++ { ++ size_t blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len; ++ read_write(fd, block->key, blen, 0); ++ len -= blen; ++ } ++} ++ ++struct blockdata *blockdata_read(int fd, size_t len) ++{ ++ return blockdata_alloc_real(fd, NULL, len); ++} ++ + #endif +--- a/src/cache.c ++++ b/src/cache.c +@@ -26,6 +26,8 @@ static union bigname *big_free = NULL; + static int bignames_left, hash_size; + + static void make_non_terminals(struct crec *source); ++static struct crec *really_insert(char *name, struct all_addr *addr, ++ time_t now, unsigned long ttl, unsigned short flags); + + /* type->string mapping: this is also used by the name-hash function as a mixing table. */ + static const struct { +@@ -464,16 +466,10 @@ void cache_start_insert(void) + new_chain = NULL; + insert_error = 0; + } +- ++ + struct crec *cache_insert(char *name, struct all_addr *addr, + time_t now, unsigned long ttl, unsigned short flags) + { +- struct crec *new, *target_crec = NULL; +- union bigname *big_name = NULL; +- int freed_all = flags & F_REVERSE; +- int free_avail = 0; +- unsigned int target_uid; +- + /* Don't log DNSSEC records here, done elsewhere */ + if (flags & (F_IPV4 | F_IPV6 | F_CNAME)) + { +@@ -484,7 +480,20 @@ struct crec *cache_insert(char *name, st + if (daemon->min_cache_ttl != 0 && daemon->min_cache_ttl > ttl) + ttl = daemon->min_cache_ttl; + } ++ ++ return really_insert(name, addr, now, ttl, flags); ++} + ++ ++static struct crec *really_insert(char *name, struct all_addr *addr, ++ time_t now, unsigned long ttl, unsigned short flags) ++{ ++ struct crec *new, *target_crec = NULL; ++ union bigname *big_name = NULL; ++ int freed_all = flags & F_REVERSE; ++ int free_avail = 0; ++ unsigned int target_uid; ++ + /* if previous insertion failed give up now. */ + if (insert_error) + return NULL; +@@ -645,12 +654,185 @@ void cache_end_insert(void) + cache_hash(new_chain); + cache_link(new_chain); + daemon->metrics[METRIC_DNS_CACHE_INSERTED]++; ++ ++ /* If we're a child process, send this cache entry up the pipe to the master. ++ The marshalling process is rather nasty. */ ++ if (daemon->pipe_to_parent != -1) ++ { ++ char *name = cache_get_name(new_chain); ++ ssize_t m = strlen(name); ++ unsigned short flags = new_chain->flags; ++#ifdef HAVE_DNSSEC ++ u16 class = new_chain->uid; ++#endif ++ ++ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)name, m, 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->ttd, sizeof(new_chain->ttd), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&flags, sizeof(flags), 0); ++ ++ if (flags & (F_IPV4 | F_IPV6)) ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr, sizeof(new_chain->addr), 0); ++#ifdef HAVE_DNSSEC ++ else if (flags & F_DNSKEY) ++ { ++ read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.algo, sizeof(new_chain->addr.key.algo), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keytag, sizeof(new_chain->addr.key.keytag), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.flags, sizeof(new_chain->addr.key.flags), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keylen, sizeof(new_chain->addr.key.keylen), 0); ++ blockdata_write(new_chain->addr.key.keydata, new_chain->addr.key.keylen, daemon->pipe_to_parent); ++ } ++ else if (flags & F_DS) ++ { ++ read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0); ++ /* A negative DS entry is possible and has no data, obviously. */ ++ if (!(flags & F_NEG)) ++ { ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.algo, sizeof(new_chain->addr.ds.algo), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keytag, sizeof(new_chain->addr.ds.keytag), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.digest, sizeof(new_chain->addr.ds.digest), 0); ++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keylen, sizeof(new_chain->addr.ds.keylen), 0); ++ blockdata_write(new_chain->addr.ds.keydata, new_chain->addr.ds.keylen, daemon->pipe_to_parent); ++ } ++ } ++#endif ++ ++ } + } ++ + new_chain = tmp; + } ++ ++ /* signal end of cache insert in master process */ ++ if (daemon->pipe_to_parent != -1) ++ { ++ ssize_t m = -1; ++ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0); ++ } ++ + new_chain = NULL; + } + ++ ++/* A marshalled cache entry arrives on fd, read, unmarshall and insert into cache of master process. */ ++int cache_recv_insert(time_t now, int fd) ++{ ++ ssize_t m; ++ struct all_addr addr; ++ unsigned long ttl; ++ time_t ttd; ++ unsigned short flags; ++ struct crec *crecp = NULL; ++ ++ cache_start_insert(); ++ ++ while(1) ++ { ++ ++ if (!read_write(fd, (unsigned char *)&m, sizeof(m), 1)) ++ return 0; ++ ++ if (m == -1) ++ { ++ cache_end_insert(); ++ return 1; ++ } ++ ++ if (!read_write(fd, (unsigned char *)daemon->namebuff, m, 1) || ++ !read_write(fd, (unsigned char *)&ttd, sizeof(ttd), 1) || ++ !read_write(fd, (unsigned char *)&flags, sizeof(flags), 1)) ++ return 0; ++ ++ daemon->namebuff[m] = 0; ++ ++ ttl = difftime(ttd, now); ++ ++ if (flags & (F_IPV4 | F_IPV6)) ++ { ++ if (!read_write(fd, (unsigned char *)&addr, sizeof(addr), 1)) ++ return 0; ++ crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags); ++ } ++ else if (flags & F_CNAME) ++ { ++ struct crec *newc = really_insert(daemon->namebuff, NULL, now, ttl, flags); ++ /* This relies on the fact the the target of a CNAME immediately preceeds ++ it because of the order of extraction in extract_addresses, and ++ the order reversal on the new_chain. */ ++ if (newc) ++ { ++ if (!crecp) ++ { ++ newc->addr.cname.target.cache = NULL; ++ /* anything other than zero, to avoid being mistaken for CNAME to interface-name */ ++ newc->addr.cname.uid = 1; ++ } ++ else ++ { ++ next_uid(crecp); ++ newc->addr.cname.target.cache = crecp; ++ newc->addr.cname.uid = crecp->uid; ++ } ++ } ++ } ++#ifdef HAVE_DNSSEC ++ else if (flags & (F_DNSKEY | F_DS)) ++ { ++ unsigned short class, keylen, keyflags, keytag; ++ unsigned char algo, digest; ++ struct blockdata *keydata; ++ ++ if (!read_write(fd, (unsigned char *)&class, sizeof(class), 1)) ++ return 0; ++ /* Cache needs to known class for DNSSEC stuff */ ++ addr.addr.dnssec.class = class; ++ ++ crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags); ++ ++ if (flags & F_DNSKEY) ++ { ++ if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) || ++ !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) || ++ !read_write(fd, (unsigned char *)&keyflags, sizeof(keyflags), 1) || ++ !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) || ++ !(keydata = blockdata_read(fd, keylen))) ++ return 0; ++ } ++ else if (!(flags & F_NEG)) ++ { ++ if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) || ++ !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) || ++ !read_write(fd, (unsigned char *)&digest, sizeof(digest), 1) || ++ !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) || ++ !(keydata = blockdata_read(fd, keylen))) ++ return 0; ++ } ++ ++ if (crecp) ++ { ++ if (flags & F_DNSKEY) ++ { ++ crecp->addr.key.algo = algo; ++ crecp->addr.key.keytag = keytag; ++ crecp->addr.key.flags = flags; ++ crecp->addr.key.keylen = keylen; ++ crecp->addr.key.keydata = keydata; ++ } ++ else if (!(flags & F_NEG)) ++ { ++ crecp->addr.ds.algo = algo; ++ crecp->addr.ds.keytag = keytag; ++ crecp->addr.ds.digest = digest; ++ crecp->addr.ds.keylen = keylen; ++ crecp->addr.ds.keydata = keydata; ++ } ++ } ++ } ++#endif ++ } ++} ++ + int cache_find_non_terminal(char *name, time_t now) + { + struct crec *crecp; +--- a/src/dnsmasq.c ++++ b/src/dnsmasq.c +@@ -930,6 +930,10 @@ int main (int argc, char **argv) + check_servers(); + + pid = getpid(); ++ ++ daemon->pipe_to_parent = -1; ++ for (i = 0; i < MAX_PROCS; i++) ++ daemon->tcp_pipes[i] = -1; + + #ifdef HAVE_INOTIFY + /* Using inotify, have to select a resolv file at startup */ +@@ -1611,7 +1615,7 @@ static int set_dns_listeners(time_t now) + we don't need to explicitly arrange to wake up here */ + if (listener->tcpfd != -1) + for (i = 0; i < MAX_PROCS; i++) +- if (daemon->tcp_pids[i] == 0) ++ if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) + { + poll_listen(listener->tcpfd, POLLIN); + break; +@@ -1624,6 +1628,13 @@ static int set_dns_listeners(time_t now) + + } + ++#ifndef NO_FORK ++ if (!option_bool(OPT_DEBUG)) ++ for (i = 0; i < MAX_PROCS; i++) ++ if (daemon->tcp_pipes[i] != -1) ++ poll_listen(daemon->tcp_pipes[i], POLLIN); ++#endif ++ + return wait; + } + +@@ -1632,7 +1643,10 @@ static void check_dns_listeners(time_t n + struct serverfd *serverfdp; + struct listener *listener; + int i; +- ++#ifndef NO_FORK ++ int pipefd[2]; ++#endif ++ + for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next) + if (poll_check(serverfdp->fd, POLLIN)) + reply_query(serverfdp->fd, serverfdp->source_addr.sa.sa_family, now); +@@ -1642,7 +1656,26 @@ static void check_dns_listeners(time_t n + if (daemon->randomsocks[i].refcount != 0 && + poll_check(daemon->randomsocks[i].fd, POLLIN)) + reply_query(daemon->randomsocks[i].fd, daemon->randomsocks[i].family, now); +- ++ ++#ifndef NO_FORK ++ /* Races. The child process can die before we read all of the data from the ++ pipe, or vice versa. Therefore send tcp_pids to zero when we wait() the ++ process, and tcp_pipes to -1 and close the FD when we read the last ++ of the data - indicated by cache_recv_insert returning zero. ++ The order of these events is indeterminate, and both are needed ++ to free the process slot. Once the child process has gone, poll() ++ returns POLLHUP, not POLLIN, so have to check for both here. */ ++ if (!option_bool(OPT_DEBUG)) ++ for (i = 0; i < MAX_PROCS; i++) ++ if (daemon->tcp_pipes[i] != -1 && ++ poll_check(daemon->tcp_pipes[i], POLLIN | POLLHUP) && ++ !cache_recv_insert(now, daemon->tcp_pipes[i])) ++ { ++ close(daemon->tcp_pipes[i]); ++ daemon->tcp_pipes[i] = -1; ++ } ++#endif ++ + for (listener = daemon->listeners; listener; listener = listener->next) + { + if (listener->fd != -1 && poll_check(listener->fd, POLLIN)) +@@ -1736,15 +1769,20 @@ static void check_dns_listeners(time_t n + while (retry_send(close(confd))); + } + #ifndef NO_FORK +- else if (!option_bool(OPT_DEBUG) && (p = fork()) != 0) ++ else if (!option_bool(OPT_DEBUG) && pipe(pipefd) == 0 && (p = fork()) != 0) + { +- if (p != -1) ++ close(pipefd[1]); /* parent needs read pipe end. */ ++ if (p == -1) ++ close(pipefd[0]); ++ else + { + int i; ++ + for (i = 0; i < MAX_PROCS; i++) +- if (daemon->tcp_pids[i] == 0) ++ if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1) + { + daemon->tcp_pids[i] = p; ++ daemon->tcp_pipes[i] = pipefd[0]; + break; + } + } +@@ -1761,7 +1799,7 @@ static void check_dns_listeners(time_t n + int flags; + struct in_addr netmask; + int auth_dns; +- ++ + if (iface) + { + netmask = iface->netmask; +@@ -1777,7 +1815,11 @@ static void check_dns_listeners(time_t n + /* Arrange for SIGALRM after CHILD_LIFETIME seconds to + terminate the process. */ + if (!option_bool(OPT_DEBUG)) +- alarm(CHILD_LIFETIME); ++ { ++ alarm(CHILD_LIFETIME); ++ close(pipefd[0]); /* close read end in child. */ ++ daemon->pipe_to_parent = pipefd[1]; ++ } + #endif + + /* start with no upstream connections. */ +--- a/src/dnsmasq.h ++++ b/src/dnsmasq.h +@@ -1091,6 +1091,8 @@ extern struct daemon { + size_t packet_len; /* " " */ + struct randfd *rfd_save; /* " " */ + pid_t tcp_pids[MAX_PROCS]; ++ int tcp_pipes[MAX_PROCS]; ++ int pipe_to_parent; + struct randfd randomsocks[RANDOM_SOCKS]; + int v6pktinfo; + struct addrlist *interface_addrs; /* list of all addresses/prefix lengths associated with all local interfaces */ +@@ -1152,6 +1154,7 @@ struct crec *cache_find_by_name(struct c + char *name, time_t now, unsigned int prot); + void cache_end_insert(void); + void cache_start_insert(void); ++int cache_recv_insert(time_t now, int fd); + struct crec *cache_insert(char *name, struct all_addr *addr, + time_t now, unsigned long ttl, unsigned short flags); + void cache_reload(void); +@@ -1174,6 +1177,8 @@ void blockdata_init(void); + void blockdata_report(void); + struct blockdata *blockdata_alloc(char *data, size_t len); + void *blockdata_retrieve(struct blockdata *block, size_t len, void *data); ++struct blockdata *blockdata_read(int fd, size_t len); ++void blockdata_write(struct blockdata *block, size_t len, int fd); + void blockdata_free(struct blockdata *blocks); + #endif + |