aboutsummaryrefslogtreecommitdiffstats
path: root/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch
diff options
context:
space:
mode:
Diffstat (limited to 'package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch')
-rw-r--r--package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch495
1 files changed, 495 insertions, 0 deletions
diff --git a/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch b/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch
new file mode 100644
index 0000000000..453c2924b2
--- /dev/null
+++ b/package/network/services/dnsmasq/patches/0001-Impove-cache-behaviour-for-TCP-connections.patch
@@ -0,0 +1,495 @@
+From a799ca0c6314ad73a97bc6c89382d2712a9c0b0e Mon Sep 17 00:00:00 2001
+From: Simon Kelley <simon@thekelleys.org.uk>
+Date: Thu, 18 Oct 2018 19:35:29 +0100
+Subject: [PATCH 01/11] Impove cache behaviour for TCP connections.
+
+For ease of implementaion, dnsmasq has always forked a new process to
+handle each incoming TCP connection. A side-effect of this is that any
+DNS queries answered from TCP connections are not cached: when TCP
+connections were rare, this was not a problem. With the coming of
+DNSSEC, it's now the case that some DNSSEC queries have answers which
+spill to TCP, and if, for instance, this applies to the keys for the
+root then those never get cached, and performance is very bad. This
+fix passes cache entries back from the TCP child process to the main
+server process, and fixes the problem.
+
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ CHANGELOG | 14 ++++
+ src/blockdata.c | 37 ++++++++-
+ src/cache.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++--
+ src/dnsmasq.c | 58 ++++++++++++--
+ src/dnsmasq.h | 5 ++
+ 5 files changed, 291 insertions(+), 19 deletions(-)
+
+--- a/CHANGELOG
++++ b/CHANGELOG
+@@ -1,3 +1,17 @@
++version 2.81
++ Impove cache behaviour for TCP connections. For ease of
++ implementaion, dnsmasq has always forked a new process to handle
++ each incoming TCP connection. A side-effect of this is that
++ any DNS queries answered from TCP connections are not cached:
++ when TCP connections were rare, this was not a problem.
++ With the coming of DNSSEC, it's now the case that some
++ DNSSEC queries have answers which spill to TCP, and if,
++ for instance, this applies to the keys for the root then
++ those never get cached, and performance is very bad.
++ This fix passes cache entries back from the TCP child process to
++ the main server process, and fixes the problem.
++
++
+ version 2.80
+ Add support for RFC 4039 DHCP rapid commit. Thanks to Ashram Method
+ for the initial patch and motivation.
+--- a/src/blockdata.c
++++ b/src/blockdata.c
+@@ -61,7 +61,7 @@ void blockdata_report(void)
+ blockdata_alloced * sizeof(struct blockdata));
+ }
+
+-struct blockdata *blockdata_alloc(char *data, size_t len)
++static struct blockdata *blockdata_alloc_real(int fd, char *data, size_t len)
+ {
+ struct blockdata *block, *ret = NULL;
+ struct blockdata **prev = &ret;
+@@ -89,8 +89,17 @@ struct blockdata *blockdata_alloc(char *
+ blockdata_hwm = blockdata_count;
+
+ blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len;
+- memcpy(block->key, data, blen);
+- data += blen;
++ if (data)
++ {
++ memcpy(block->key, data, blen);
++ data += blen;
++ }
++ else if (!read_write(fd, block->key, blen, 1))
++ {
++ /* failed read free partial chain */
++ blockdata_free(ret);
++ return NULL;
++ }
+ len -= blen;
+ *prev = block;
+ prev = &block->next;
+@@ -100,6 +109,10 @@ struct blockdata *blockdata_alloc(char *
+ return ret;
+ }
+
++struct blockdata *blockdata_alloc(char *data, size_t len)
++{
++ return blockdata_alloc_real(0, data, len);
++}
+
+ void blockdata_free(struct blockdata *blocks)
+ {
+@@ -148,5 +161,21 @@ void *blockdata_retrieve(struct blockdat
+
+ return data;
+ }
+-
++
++
++void blockdata_write(struct blockdata *block, size_t len, int fd)
++{
++ for (; len > 0 && block; block = block->next)
++ {
++ size_t blen = len > KEYBLOCK_LEN ? KEYBLOCK_LEN : len;
++ read_write(fd, block->key, blen, 0);
++ len -= blen;
++ }
++}
++
++struct blockdata *blockdata_read(int fd, size_t len)
++{
++ return blockdata_alloc_real(fd, NULL, len);
++}
++
+ #endif
+--- a/src/cache.c
++++ b/src/cache.c
+@@ -26,6 +26,8 @@ static union bigname *big_free = NULL;
+ static int bignames_left, hash_size;
+
+ static void make_non_terminals(struct crec *source);
++static struct crec *really_insert(char *name, struct all_addr *addr,
++ time_t now, unsigned long ttl, unsigned short flags);
+
+ /* type->string mapping: this is also used by the name-hash function as a mixing table. */
+ static const struct {
+@@ -464,16 +466,10 @@ void cache_start_insert(void)
+ new_chain = NULL;
+ insert_error = 0;
+ }
+-
++
+ struct crec *cache_insert(char *name, struct all_addr *addr,
+ time_t now, unsigned long ttl, unsigned short flags)
+ {
+- struct crec *new, *target_crec = NULL;
+- union bigname *big_name = NULL;
+- int freed_all = flags & F_REVERSE;
+- int free_avail = 0;
+- unsigned int target_uid;
+-
+ /* Don't log DNSSEC records here, done elsewhere */
+ if (flags & (F_IPV4 | F_IPV6 | F_CNAME))
+ {
+@@ -484,7 +480,20 @@ struct crec *cache_insert(char *name, st
+ if (daemon->min_cache_ttl != 0 && daemon->min_cache_ttl > ttl)
+ ttl = daemon->min_cache_ttl;
+ }
++
++ return really_insert(name, addr, now, ttl, flags);
++}
+
++
++static struct crec *really_insert(char *name, struct all_addr *addr,
++ time_t now, unsigned long ttl, unsigned short flags)
++{
++ struct crec *new, *target_crec = NULL;
++ union bigname *big_name = NULL;
++ int freed_all = flags & F_REVERSE;
++ int free_avail = 0;
++ unsigned int target_uid;
++
+ /* if previous insertion failed give up now. */
+ if (insert_error)
+ return NULL;
+@@ -645,12 +654,185 @@ void cache_end_insert(void)
+ cache_hash(new_chain);
+ cache_link(new_chain);
+ daemon->metrics[METRIC_DNS_CACHE_INSERTED]++;
++
++ /* If we're a child process, send this cache entry up the pipe to the master.
++ The marshalling process is rather nasty. */
++ if (daemon->pipe_to_parent != -1)
++ {
++ char *name = cache_get_name(new_chain);
++ ssize_t m = strlen(name);
++ unsigned short flags = new_chain->flags;
++#ifdef HAVE_DNSSEC
++ u16 class = new_chain->uid;
++#endif
++
++ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)name, m, 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->ttd, sizeof(new_chain->ttd), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&flags, sizeof(flags), 0);
++
++ if (flags & (F_IPV4 | F_IPV6))
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr, sizeof(new_chain->addr), 0);
++#ifdef HAVE_DNSSEC
++ else if (flags & F_DNSKEY)
++ {
++ read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.algo, sizeof(new_chain->addr.key.algo), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keytag, sizeof(new_chain->addr.key.keytag), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.flags, sizeof(new_chain->addr.key.flags), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.key.keylen, sizeof(new_chain->addr.key.keylen), 0);
++ blockdata_write(new_chain->addr.key.keydata, new_chain->addr.key.keylen, daemon->pipe_to_parent);
++ }
++ else if (flags & F_DS)
++ {
++ read_write(daemon->pipe_to_parent, (unsigned char *)&class, sizeof(class), 0);
++ /* A negative DS entry is possible and has no data, obviously. */
++ if (!(flags & F_NEG))
++ {
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.algo, sizeof(new_chain->addr.ds.algo), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keytag, sizeof(new_chain->addr.ds.keytag), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.digest, sizeof(new_chain->addr.ds.digest), 0);
++ read_write(daemon->pipe_to_parent, (unsigned char *)&new_chain->addr.ds.keylen, sizeof(new_chain->addr.ds.keylen), 0);
++ blockdata_write(new_chain->addr.ds.keydata, new_chain->addr.ds.keylen, daemon->pipe_to_parent);
++ }
++ }
++#endif
++
++ }
+ }
++
+ new_chain = tmp;
+ }
++
++ /* signal end of cache insert in master process */
++ if (daemon->pipe_to_parent != -1)
++ {
++ ssize_t m = -1;
++ read_write(daemon->pipe_to_parent, (unsigned char *)&m, sizeof(m), 0);
++ }
++
+ new_chain = NULL;
+ }
+
++
++/* A marshalled cache entry arrives on fd, read, unmarshall and insert into cache of master process. */
++int cache_recv_insert(time_t now, int fd)
++{
++ ssize_t m;
++ struct all_addr addr;
++ unsigned long ttl;
++ time_t ttd;
++ unsigned short flags;
++ struct crec *crecp = NULL;
++
++ cache_start_insert();
++
++ while(1)
++ {
++
++ if (!read_write(fd, (unsigned char *)&m, sizeof(m), 1))
++ return 0;
++
++ if (m == -1)
++ {
++ cache_end_insert();
++ return 1;
++ }
++
++ if (!read_write(fd, (unsigned char *)daemon->namebuff, m, 1) ||
++ !read_write(fd, (unsigned char *)&ttd, sizeof(ttd), 1) ||
++ !read_write(fd, (unsigned char *)&flags, sizeof(flags), 1))
++ return 0;
++
++ daemon->namebuff[m] = 0;
++
++ ttl = difftime(ttd, now);
++
++ if (flags & (F_IPV4 | F_IPV6))
++ {
++ if (!read_write(fd, (unsigned char *)&addr, sizeof(addr), 1))
++ return 0;
++ crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags);
++ }
++ else if (flags & F_CNAME)
++ {
++ struct crec *newc = really_insert(daemon->namebuff, NULL, now, ttl, flags);
++ /* This relies on the fact the the target of a CNAME immediately preceeds
++ it because of the order of extraction in extract_addresses, and
++ the order reversal on the new_chain. */
++ if (newc)
++ {
++ if (!crecp)
++ {
++ newc->addr.cname.target.cache = NULL;
++ /* anything other than zero, to avoid being mistaken for CNAME to interface-name */
++ newc->addr.cname.uid = 1;
++ }
++ else
++ {
++ next_uid(crecp);
++ newc->addr.cname.target.cache = crecp;
++ newc->addr.cname.uid = crecp->uid;
++ }
++ }
++ }
++#ifdef HAVE_DNSSEC
++ else if (flags & (F_DNSKEY | F_DS))
++ {
++ unsigned short class, keylen, keyflags, keytag;
++ unsigned char algo, digest;
++ struct blockdata *keydata;
++
++ if (!read_write(fd, (unsigned char *)&class, sizeof(class), 1))
++ return 0;
++ /* Cache needs to known class for DNSSEC stuff */
++ addr.addr.dnssec.class = class;
++
++ crecp = really_insert(daemon->namebuff, &addr, now, ttl, flags);
++
++ if (flags & F_DNSKEY)
++ {
++ if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) ||
++ !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) ||
++ !read_write(fd, (unsigned char *)&keyflags, sizeof(keyflags), 1) ||
++ !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) ||
++ !(keydata = blockdata_read(fd, keylen)))
++ return 0;
++ }
++ else if (!(flags & F_NEG))
++ {
++ if (!read_write(fd, (unsigned char *)&algo, sizeof(algo), 1) ||
++ !read_write(fd, (unsigned char *)&keytag, sizeof(keytag), 1) ||
++ !read_write(fd, (unsigned char *)&digest, sizeof(digest), 1) ||
++ !read_write(fd, (unsigned char *)&keylen, sizeof(keylen), 1) ||
++ !(keydata = blockdata_read(fd, keylen)))
++ return 0;
++ }
++
++ if (crecp)
++ {
++ if (flags & F_DNSKEY)
++ {
++ crecp->addr.key.algo = algo;
++ crecp->addr.key.keytag = keytag;
++ crecp->addr.key.flags = flags;
++ crecp->addr.key.keylen = keylen;
++ crecp->addr.key.keydata = keydata;
++ }
++ else if (!(flags & F_NEG))
++ {
++ crecp->addr.ds.algo = algo;
++ crecp->addr.ds.keytag = keytag;
++ crecp->addr.ds.digest = digest;
++ crecp->addr.ds.keylen = keylen;
++ crecp->addr.ds.keydata = keydata;
++ }
++ }
++ }
++#endif
++ }
++}
++
+ int cache_find_non_terminal(char *name, time_t now)
+ {
+ struct crec *crecp;
+--- a/src/dnsmasq.c
++++ b/src/dnsmasq.c
+@@ -930,6 +930,10 @@ int main (int argc, char **argv)
+ check_servers();
+
+ pid = getpid();
++
++ daemon->pipe_to_parent = -1;
++ for (i = 0; i < MAX_PROCS; i++)
++ daemon->tcp_pipes[i] = -1;
+
+ #ifdef HAVE_INOTIFY
+ /* Using inotify, have to select a resolv file at startup */
+@@ -1611,7 +1615,7 @@ static int set_dns_listeners(time_t now)
+ we don't need to explicitly arrange to wake up here */
+ if (listener->tcpfd != -1)
+ for (i = 0; i < MAX_PROCS; i++)
+- if (daemon->tcp_pids[i] == 0)
++ if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
+ {
+ poll_listen(listener->tcpfd, POLLIN);
+ break;
+@@ -1624,6 +1628,13 @@ static int set_dns_listeners(time_t now)
+
+ }
+
++#ifndef NO_FORK
++ if (!option_bool(OPT_DEBUG))
++ for (i = 0; i < MAX_PROCS; i++)
++ if (daemon->tcp_pipes[i] != -1)
++ poll_listen(daemon->tcp_pipes[i], POLLIN);
++#endif
++
+ return wait;
+ }
+
+@@ -1632,7 +1643,10 @@ static void check_dns_listeners(time_t n
+ struct serverfd *serverfdp;
+ struct listener *listener;
+ int i;
+-
++#ifndef NO_FORK
++ int pipefd[2];
++#endif
++
+ for (serverfdp = daemon->sfds; serverfdp; serverfdp = serverfdp->next)
+ if (poll_check(serverfdp->fd, POLLIN))
+ reply_query(serverfdp->fd, serverfdp->source_addr.sa.sa_family, now);
+@@ -1642,7 +1656,26 @@ static void check_dns_listeners(time_t n
+ if (daemon->randomsocks[i].refcount != 0 &&
+ poll_check(daemon->randomsocks[i].fd, POLLIN))
+ reply_query(daemon->randomsocks[i].fd, daemon->randomsocks[i].family, now);
+-
++
++#ifndef NO_FORK
++ /* Races. The child process can die before we read all of the data from the
++ pipe, or vice versa. Therefore send tcp_pids to zero when we wait() the
++ process, and tcp_pipes to -1 and close the FD when we read the last
++ of the data - indicated by cache_recv_insert returning zero.
++ The order of these events is indeterminate, and both are needed
++ to free the process slot. Once the child process has gone, poll()
++ returns POLLHUP, not POLLIN, so have to check for both here. */
++ if (!option_bool(OPT_DEBUG))
++ for (i = 0; i < MAX_PROCS; i++)
++ if (daemon->tcp_pipes[i] != -1 &&
++ poll_check(daemon->tcp_pipes[i], POLLIN | POLLHUP) &&
++ !cache_recv_insert(now, daemon->tcp_pipes[i]))
++ {
++ close(daemon->tcp_pipes[i]);
++ daemon->tcp_pipes[i] = -1;
++ }
++#endif
++
+ for (listener = daemon->listeners; listener; listener = listener->next)
+ {
+ if (listener->fd != -1 && poll_check(listener->fd, POLLIN))
+@@ -1736,15 +1769,20 @@ static void check_dns_listeners(time_t n
+ while (retry_send(close(confd)));
+ }
+ #ifndef NO_FORK
+- else if (!option_bool(OPT_DEBUG) && (p = fork()) != 0)
++ else if (!option_bool(OPT_DEBUG) && pipe(pipefd) == 0 && (p = fork()) != 0)
+ {
+- if (p != -1)
++ close(pipefd[1]); /* parent needs read pipe end. */
++ if (p == -1)
++ close(pipefd[0]);
++ else
+ {
+ int i;
++
+ for (i = 0; i < MAX_PROCS; i++)
+- if (daemon->tcp_pids[i] == 0)
++ if (daemon->tcp_pids[i] == 0 && daemon->tcp_pipes[i] == -1)
+ {
+ daemon->tcp_pids[i] = p;
++ daemon->tcp_pipes[i] = pipefd[0];
+ break;
+ }
+ }
+@@ -1761,7 +1799,7 @@ static void check_dns_listeners(time_t n
+ int flags;
+ struct in_addr netmask;
+ int auth_dns;
+-
++
+ if (iface)
+ {
+ netmask = iface->netmask;
+@@ -1777,7 +1815,11 @@ static void check_dns_listeners(time_t n
+ /* Arrange for SIGALRM after CHILD_LIFETIME seconds to
+ terminate the process. */
+ if (!option_bool(OPT_DEBUG))
+- alarm(CHILD_LIFETIME);
++ {
++ alarm(CHILD_LIFETIME);
++ close(pipefd[0]); /* close read end in child. */
++ daemon->pipe_to_parent = pipefd[1];
++ }
+ #endif
+
+ /* start with no upstream connections. */
+--- a/src/dnsmasq.h
++++ b/src/dnsmasq.h
+@@ -1091,6 +1091,8 @@ extern struct daemon {
+ size_t packet_len; /* " " */
+ struct randfd *rfd_save; /* " " */
+ pid_t tcp_pids[MAX_PROCS];
++ int tcp_pipes[MAX_PROCS];
++ int pipe_to_parent;
+ struct randfd randomsocks[RANDOM_SOCKS];
+ int v6pktinfo;
+ struct addrlist *interface_addrs; /* list of all addresses/prefix lengths associated with all local interfaces */
+@@ -1152,6 +1154,7 @@ struct crec *cache_find_by_name(struct c
+ char *name, time_t now, unsigned int prot);
+ void cache_end_insert(void);
+ void cache_start_insert(void);
++int cache_recv_insert(time_t now, int fd);
+ struct crec *cache_insert(char *name, struct all_addr *addr,
+ time_t now, unsigned long ttl, unsigned short flags);
+ void cache_reload(void);
+@@ -1174,6 +1177,8 @@ void blockdata_init(void);
+ void blockdata_report(void);
+ struct blockdata *blockdata_alloc(char *data, size_t len);
+ void *blockdata_retrieve(struct blockdata *block, size_t len, void *data);
++struct blockdata *blockdata_read(int fd, size_t len);
++void blockdata_write(struct blockdata *block, size_t len, int fd);
+ void blockdata_free(struct blockdata *blocks);
+ #endif
+