diff options
| author | fishsoupisgood <github@madingley.org> | 2019-04-29 01:17:54 +0100 | 
|---|---|---|
| committer | fishsoupisgood <github@madingley.org> | 2019-05-27 03:43:43 +0100 | 
| commit | 3f2546b2ef55b661fd8dd69682b38992225e86f6 (patch) | |
| tree | 65ca85f13617aee1dce474596800950f266a456c /util | |
| download | qemu-master.tar.gz qemu-master.tar.bz2 qemu-master.zip | |
Diffstat (limited to 'util')
39 files changed, 13465 insertions, 0 deletions
| diff --git a/util/Makefile.objs b/util/Makefile.objs new file mode 100644 index 00000000..114d6578 --- /dev/null +++ b/util/Makefile.objs @@ -0,0 +1,20 @@ +util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o +util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o +util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o qemu-openpty.o +util-obj-y += envlist.o path.o module.o +util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o +util-obj-y += bitmap.o bitops.o hbitmap.o +util-obj-y += fifo8.o +util-obj-y += acl.o +util-obj-y += error.o qemu-error.o +util-obj-$(CONFIG_POSIX) += compatfd.o +util-obj-y += id.o +util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o +util-obj-y += qemu-option.o qemu-progress.o +util-obj-y += hexdump.o +util-obj-y += crc32c.o +util-obj-y += throttle.o +util-obj-y += getauxval.o +util-obj-y += readline.o +util-obj-y += rfifolock.o +util-obj-y += rcu.o diff --git a/util/acl.c b/util/acl.c new file mode 100644 index 00000000..571d6861 --- /dev/null +++ b/util/acl.c @@ -0,0 +1,187 @@ +/* + * QEMU access control list management + * + * Copyright (C) 2009 Red Hat, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#include "qemu-common.h" +#include "qemu/acl.h" + +#ifdef CONFIG_FNMATCH +#include <fnmatch.h> +#endif + + +static unsigned int nacls = 0; +static qemu_acl **acls = NULL; + + + +qemu_acl *qemu_acl_find(const char *aclname) +{ +    int i; +    for (i = 0 ; i < nacls ; i++) { +        if (strcmp(acls[i]->aclname, aclname) == 0) +            return acls[i]; +    } + +    return NULL; +} + +qemu_acl *qemu_acl_init(const char *aclname) +{ +    qemu_acl *acl; + +    acl = qemu_acl_find(aclname); +    if (acl) +        return acl; + +    acl = g_malloc(sizeof(*acl)); +    acl->aclname = g_strdup(aclname); +    /* Deny by default, so there is no window of "open +     * access" between QEMU starting, and the user setting +     * up ACLs in the monitor */ +    acl->defaultDeny = 1; + +    acl->nentries = 0; +    QTAILQ_INIT(&acl->entries); + +    acls = g_realloc(acls, sizeof(*acls) * (nacls +1)); +    acls[nacls] = acl; +    nacls++; + +    return acl; +} + +int qemu_acl_party_is_allowed(qemu_acl *acl, +                              const char *party) +{ +    qemu_acl_entry *entry; + +    QTAILQ_FOREACH(entry, &acl->entries, next) { +#ifdef CONFIG_FNMATCH +        if (fnmatch(entry->match, party, 0) == 0) +            return entry->deny ? 0 : 1; +#else +        /* No fnmatch, so fallback to exact string matching +         * instead of allowing wildcards */ +        if (strcmp(entry->match, party) == 0) +            return entry->deny ? 0 : 1; +#endif +    } + +    return acl->defaultDeny ? 0 : 1; +} + + +void qemu_acl_reset(qemu_acl *acl) +{ +    qemu_acl_entry *entry, *next_entry; + +    /* Put back to deny by default, so there is no window +     * of "open access" while the user re-initializes the +     * access control list */ +    acl->defaultDeny = 1; +    QTAILQ_FOREACH_SAFE(entry, &acl->entries, next, next_entry) { +        QTAILQ_REMOVE(&acl->entries, entry, next); +        g_free(entry->match); +        g_free(entry); +    } +    acl->nentries = 0; +} + + +int qemu_acl_append(qemu_acl *acl, +                    int deny, +                    const char *match) +{ +    qemu_acl_entry *entry; + +    entry = g_malloc(sizeof(*entry)); +    entry->match = g_strdup(match); +    entry->deny = deny; + +    QTAILQ_INSERT_TAIL(&acl->entries, entry, next); +    acl->nentries++; + +    return acl->nentries; +} + + +int qemu_acl_insert(qemu_acl *acl, +                    int deny, +                    const char *match, +                    int index) +{ +    qemu_acl_entry *tmp; +    int i = 0; + +    if (index <= 0) +        return -1; +    if (index > acl->nentries) { +        return qemu_acl_append(acl, deny, match); +    } + +    QTAILQ_FOREACH(tmp, &acl->entries, next) { +        i++; +        if (i == index) { +            qemu_acl_entry *entry; +            entry = g_malloc(sizeof(*entry)); +            entry->match = g_strdup(match); +            entry->deny = deny; + +            QTAILQ_INSERT_BEFORE(tmp, entry, next); +            acl->nentries++; +            break; +        } +    } + +    return i; +} + +int qemu_acl_remove(qemu_acl *acl, +                    const char *match) +{ +    qemu_acl_entry *entry; +    int i = 0; + +    QTAILQ_FOREACH(entry, &acl->entries, next) { +        i++; +        if (strcmp(entry->match, match) == 0) { +            QTAILQ_REMOVE(&acl->entries, entry, next); +            acl->nentries--; +            g_free(entry->match); +            g_free(entry); +            return i; +        } +    } +    return -1; +} + + +/* + * Local variables: + *  c-indent-level: 4 + *  c-basic-offset: 4 + *  tab-width: 8 + * End: + */ diff --git a/util/bitmap.c b/util/bitmap.c new file mode 100644 index 00000000..300a68e3 --- /dev/null +++ b/util/bitmap.c @@ -0,0 +1,339 @@ +/* + * Bitmap Module + * + * Stolen from linux/src/lib/bitmap.c + * + * Copyright (C) 2010 Corentin Chary + * + * This source code is licensed under the GNU General Public License, + * Version 2. + */ + +#include "qemu/bitops.h" +#include "qemu/bitmap.h" +#include "qemu/atomic.h" + +/* + * bitmaps provide an array of bits, implemented using an an + * array of unsigned longs.  The number of valid bits in a + * given bitmap does _not_ need to be an exact multiple of + * BITS_PER_LONG. + * + * The possible unused bits in the last, partially used word + * of a bitmap are 'don't care'.  The implementation makes + * no particular effort to keep them zero.  It ensures that + * their value will not affect the results of any operation. + * The bitmap operations that return Boolean (bitmap_empty, + * for example) or scalar (bitmap_weight, for example) results + * carefully filter out these unused bits from impacting their + * results. + * + * These operations actually hold to a slightly stronger rule: + * if you don't input any bitmaps to these ops that have some + * unused bits set, then they won't output any set unused bits + * in output bitmaps. + * + * The byte ordering of bitmaps is more natural on little + * endian architectures. + */ + +int slow_bitmap_empty(const unsigned long *bitmap, long bits) +{ +    long k, lim = bits/BITS_PER_LONG; + +    for (k = 0; k < lim; ++k) { +        if (bitmap[k]) { +            return 0; +        } +    } +    if (bits % BITS_PER_LONG) { +        if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) { +            return 0; +        } +    } + +    return 1; +} + +int slow_bitmap_full(const unsigned long *bitmap, long bits) +{ +    long k, lim = bits/BITS_PER_LONG; + +    for (k = 0; k < lim; ++k) { +        if (~bitmap[k]) { +            return 0; +        } +    } + +    if (bits % BITS_PER_LONG) { +        if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) { +            return 0; +        } +    } + +    return 1; +} + +int slow_bitmap_equal(const unsigned long *bitmap1, +                      const unsigned long *bitmap2, long bits) +{ +    long k, lim = bits/BITS_PER_LONG; + +    for (k = 0; k < lim; ++k) { +        if (bitmap1[k] != bitmap2[k]) { +            return 0; +        } +    } + +    if (bits % BITS_PER_LONG) { +        if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) { +            return 0; +        } +    } + +    return 1; +} + +void slow_bitmap_complement(unsigned long *dst, const unsigned long *src, +                            long bits) +{ +    long k, lim = bits/BITS_PER_LONG; + +    for (k = 0; k < lim; ++k) { +        dst[k] = ~src[k]; +    } + +    if (bits % BITS_PER_LONG) { +        dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); +    } +} + +int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +                    const unsigned long *bitmap2, long bits) +{ +    long k; +    long nr = BITS_TO_LONGS(bits); +    unsigned long result = 0; + +    for (k = 0; k < nr; k++) { +        result |= (dst[k] = bitmap1[k] & bitmap2[k]); +    } +    return result != 0; +} + +void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1, +                    const unsigned long *bitmap2, long bits) +{ +    long k; +    long nr = BITS_TO_LONGS(bits); + +    for (k = 0; k < nr; k++) { +        dst[k] = bitmap1[k] | bitmap2[k]; +    } +} + +void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, +                     const unsigned long *bitmap2, long bits) +{ +    long k; +    long nr = BITS_TO_LONGS(bits); + +    for (k = 0; k < nr; k++) { +        dst[k] = bitmap1[k] ^ bitmap2[k]; +    } +} + +int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +                       const unsigned long *bitmap2, long bits) +{ +    long k; +    long nr = BITS_TO_LONGS(bits); +    unsigned long result = 0; + +    for (k = 0; k < nr; k++) { +        result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); +    } +    return result != 0; +} + +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) + +void bitmap_set(unsigned long *map, long start, long nr) +{ +    unsigned long *p = map + BIT_WORD(start); +    const long size = start + nr; +    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); +    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + +    while (nr - bits_to_set >= 0) { +        *p |= mask_to_set; +        nr -= bits_to_set; +        bits_to_set = BITS_PER_LONG; +        mask_to_set = ~0UL; +        p++; +    } +    if (nr) { +        mask_to_set &= BITMAP_LAST_WORD_MASK(size); +        *p |= mask_to_set; +    } +} + +void bitmap_set_atomic(unsigned long *map, long start, long nr) +{ +    unsigned long *p = map + BIT_WORD(start); +    const long size = start + nr; +    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); +    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + +    /* First word */ +    if (nr - bits_to_set > 0) { +        atomic_or(p, mask_to_set); +        nr -= bits_to_set; +        bits_to_set = BITS_PER_LONG; +        mask_to_set = ~0UL; +        p++; +    } + +    /* Full words */ +    if (bits_to_set == BITS_PER_LONG) { +        while (nr >= BITS_PER_LONG) { +            *p = ~0UL; +            nr -= BITS_PER_LONG; +            p++; +        } +    } + +    /* Last word */ +    if (nr) { +        mask_to_set &= BITMAP_LAST_WORD_MASK(size); +        atomic_or(p, mask_to_set); +    } else { +        /* If we avoided the full barrier in atomic_or(), issue a +         * barrier to account for the assignments in the while loop. +         */ +        smp_mb(); +    } +} + +void bitmap_clear(unsigned long *map, long start, long nr) +{ +    unsigned long *p = map + BIT_WORD(start); +    const long size = start + nr; +    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); +    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + +    while (nr - bits_to_clear >= 0) { +        *p &= ~mask_to_clear; +        nr -= bits_to_clear; +        bits_to_clear = BITS_PER_LONG; +        mask_to_clear = ~0UL; +        p++; +    } +    if (nr) { +        mask_to_clear &= BITMAP_LAST_WORD_MASK(size); +        *p &= ~mask_to_clear; +    } +} + +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) +{ +    unsigned long *p = map + BIT_WORD(start); +    const long size = start + nr; +    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); +    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); +    unsigned long dirty = 0; +    unsigned long old_bits; + +    /* First word */ +    if (nr - bits_to_clear > 0) { +        old_bits = atomic_fetch_and(p, ~mask_to_clear); +        dirty |= old_bits & mask_to_clear; +        nr -= bits_to_clear; +        bits_to_clear = BITS_PER_LONG; +        mask_to_clear = ~0UL; +        p++; +    } + +    /* Full words */ +    if (bits_to_clear == BITS_PER_LONG) { +        while (nr >= BITS_PER_LONG) { +            if (*p) { +                old_bits = atomic_xchg(p, 0); +                dirty |= old_bits; +            } +            nr -= BITS_PER_LONG; +            p++; +        } +    } + +    /* Last word */ +    if (nr) { +        mask_to_clear &= BITMAP_LAST_WORD_MASK(size); +        old_bits = atomic_fetch_and(p, ~mask_to_clear); +        dirty |= old_bits & mask_to_clear; +    } else { +        if (!dirty) { +            smp_mb(); +        } +    } + +    return dirty != 0; +} + +#define ALIGN_MASK(x,mask)      (((x)+(mask))&~(mask)) + +/** + * bitmap_find_next_zero_area - find a contiguous aligned zero area + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @align_mask: Alignment mask for zero area + * + * The @align_mask should be one less than a power of 2; the effect is that + * the bit offset of all zero areas this function finds is multiples of that + * power of 2. A @align_mask of 0 means no alignment is required. + */ +unsigned long bitmap_find_next_zero_area(unsigned long *map, +                                         unsigned long size, +                                         unsigned long start, +                                         unsigned long nr, +                                         unsigned long align_mask) +{ +    unsigned long index, end, i; +again: +    index = find_next_zero_bit(map, size, start); + +    /* Align allocation */ +    index = ALIGN_MASK(index, align_mask); + +    end = index + nr; +    if (end > size) { +        return end; +    } +    i = find_next_bit(map, end, index); +    if (i < end) { +        start = i + 1; +        goto again; +    } +    return index; +} + +int slow_bitmap_intersects(const unsigned long *bitmap1, +                           const unsigned long *bitmap2, long bits) +{ +    long k, lim = bits/BITS_PER_LONG; + +    for (k = 0; k < lim; ++k) { +        if (bitmap1[k] & bitmap2[k]) { +            return 1; +        } +    } + +    if (bits % BITS_PER_LONG) { +        if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) { +            return 1; +        } +    } +    return 0; +} diff --git a/util/bitops.c b/util/bitops.c new file mode 100644 index 00000000..227c38b8 --- /dev/null +++ b/util/bitops.c @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * Copyright (C) 2008 IBM Corporation + * Written by Rusty Russell <rusty@rustcorp.com.au> + * (Inspired by David Howell's find_next_bit implementation) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "qemu/bitops.h" + +#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG) + +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, +			    unsigned long offset) +{ +    const unsigned long *p = addr + BITOP_WORD(offset); +    unsigned long result = offset & ~(BITS_PER_LONG-1); +    unsigned long tmp; + +    if (offset >= size) { +        return size; +    } +    size -= result; +    offset %= BITS_PER_LONG; +    if (offset) { +        tmp = *(p++); +        tmp &= (~0UL << offset); +        if (size < BITS_PER_LONG) { +            goto found_first; +        } +        if (tmp) { +            goto found_middle; +        } +        size -= BITS_PER_LONG; +        result += BITS_PER_LONG; +    } +    while (size >= 4*BITS_PER_LONG) { +        unsigned long d1, d2, d3; +        tmp = *p; +        d1 = *(p+1); +        d2 = *(p+2); +        d3 = *(p+3); +        if (tmp) { +            goto found_middle; +        } +        if (d1 | d2 | d3) { +            break; +        } +        p += 4; +        result += 4*BITS_PER_LONG; +        size -= 4*BITS_PER_LONG; +    } +    while (size >= BITS_PER_LONG) { +        if ((tmp = *(p++))) { +            goto found_middle; +        } +        result += BITS_PER_LONG; +        size -= BITS_PER_LONG; +    } +    if (!size) { +        return result; +    } +    tmp = *p; + +found_first: +    tmp &= (~0UL >> (BITS_PER_LONG - size)); +    if (tmp == 0UL) {		/* Are any bits set? */ +        return result + size;	/* Nope. */ +    } +found_middle: +    return result + ctzl(tmp); +} + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, +				 unsigned long offset) +{ +    const unsigned long *p = addr + BITOP_WORD(offset); +    unsigned long result = offset & ~(BITS_PER_LONG-1); +    unsigned long tmp; + +    if (offset >= size) { +        return size; +    } +    size -= result; +    offset %= BITS_PER_LONG; +    if (offset) { +        tmp = *(p++); +        tmp |= ~0UL >> (BITS_PER_LONG - offset); +        if (size < BITS_PER_LONG) { +            goto found_first; +        } +        if (~tmp) { +            goto found_middle; +        } +        size -= BITS_PER_LONG; +        result += BITS_PER_LONG; +    } +    while (size & ~(BITS_PER_LONG-1)) { +        if (~(tmp = *(p++))) { +            goto found_middle; +        } +        result += BITS_PER_LONG; +        size -= BITS_PER_LONG; +    } +    if (!size) { +        return result; +    } +    tmp = *p; + +found_first: +    tmp |= ~0UL << size; +    if (tmp == ~0UL) {	/* Are any bits zero? */ +        return result + size;	/* Nope. */ +    } +found_middle: +    return result + ctzl(~tmp); +} + +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ +    unsigned long words; +    unsigned long tmp; + +    /* Start at final word. */ +    words = size / BITS_PER_LONG; + +    /* Partial final word? */ +    if (size & (BITS_PER_LONG-1)) { +        tmp = (addr[words] & (~0UL >> (BITS_PER_LONG +                                       - (size & (BITS_PER_LONG-1))))); +        if (tmp) { +            goto found; +        } +    } + +    while (words) { +        tmp = addr[--words]; +        if (tmp) { +        found: +            return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp); +        } +    } + +    /* Not found */ +    return size; +} diff --git a/util/compatfd.c b/util/compatfd.c new file mode 100644 index 00000000..e8571502 --- /dev/null +++ b/util/compatfd.c @@ -0,0 +1,110 @@ +/* + * signalfd/eventfd compatibility + * + * Copyright IBM, Corp. 2008 + * + * Authors: + *  Anthony Liguori   <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "qemu/compatfd.h" +#include "qemu/thread.h" + +#include <sys/syscall.h> + +struct sigfd_compat_info +{ +    sigset_t mask; +    int fd; +}; + +static void *sigwait_compat(void *opaque) +{ +    struct sigfd_compat_info *info = opaque; + +    while (1) { +        int sig; +        int err; + +        err = sigwait(&info->mask, &sig); +        if (err != 0) { +            if (errno == EINTR) { +                continue; +            } else { +                return NULL; +            } +        } else { +            struct qemu_signalfd_siginfo buffer; +            size_t offset = 0; + +            memset(&buffer, 0, sizeof(buffer)); +            buffer.ssi_signo = sig; + +            while (offset < sizeof(buffer)) { +                ssize_t len; + +                len = write(info->fd, (char *)&buffer + offset, +                            sizeof(buffer) - offset); +                if (len == -1 && errno == EINTR) +                    continue; + +                if (len <= 0) { +                    return NULL; +                } + +                offset += len; +            } +        } +    } +} + +static int qemu_signalfd_compat(const sigset_t *mask) +{ +    struct sigfd_compat_info *info; +    QemuThread thread; +    int fds[2]; + +    info = malloc(sizeof(*info)); +    if (info == NULL) { +        errno = ENOMEM; +        return -1; +    } + +    if (pipe(fds) == -1) { +        free(info); +        return -1; +    } + +    qemu_set_cloexec(fds[0]); +    qemu_set_cloexec(fds[1]); + +    memcpy(&info->mask, mask, sizeof(*mask)); +    info->fd = fds[1]; + +    qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info, +                       QEMU_THREAD_DETACHED); + +    return fds[0]; +} + +int qemu_signalfd(const sigset_t *mask) +{ +#if defined(CONFIG_SIGNALFD) +    int ret; + +    ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8); +    if (ret != -1) { +        qemu_set_cloexec(ret); +        return ret; +    } +#endif + +    return qemu_signalfd_compat(mask); +} diff --git a/util/crc32c.c b/util/crc32c.c new file mode 100644 index 00000000..88663278 --- /dev/null +++ b/util/crc32c.c @@ -0,0 +1,115 @@ +/* + *  Castagnoli CRC32C Checksum Algorithm + * + *  Polynomial: 0x11EDC6F41 + * + *  Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman + *               "Optimization of Cyclic Redundancy-Check Codes with 24 + *                 and 32 Parity Bits",IEEE Transactions on Communication, + *                Volume 41, Number 6, June 1993 + * + *  Copyright (c) 2013 Red Hat, Inc., + * + *  Authors: + *   Jeff Cody <jcody@redhat.com> + * + *  Based on the Linux kernel cryptographic crc32c module, + * + *  Copyright (c) 2004 Cisco Systems, Inc. + *  Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include "qemu-common.h" +#include "qemu/crc32c.h" + +/* + * This is the CRC-32C table + * Generated with: + * width = 32 bits + * poly = 0x1EDC6F41 + * reflect input bytes = true + * reflect output bytes = true + */ + +static const uint32_t crc32c_table[256] = { +    0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, +    0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, +    0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, +    0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, +    0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, +    0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, +    0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, +    0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, +    0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, +    0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, +    0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, +    0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, +    0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, +    0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, +    0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, +    0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, +    0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, +    0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, +    0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, +    0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, +    0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, +    0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, +    0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, +    0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, +    0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, +    0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, +    0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, +    0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, +    0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, +    0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, +    0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, +    0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, +    0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, +    0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, +    0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, +    0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, +    0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, +    0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, +    0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, +    0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, +    0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, +    0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, +    0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, +    0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, +    0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, +    0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, +    0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, +    0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, +    0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, +    0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, +    0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, +    0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, +    0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, +    0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, +    0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, +    0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, +    0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, +    0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, +    0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, +    0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, +    0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, +    0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, +    0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, +    0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L +}; + + +uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length) +{ +    while (length--) { +        crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); +    } +    return crc^0xffffffff; +} + diff --git a/util/cutils.c b/util/cutils.c new file mode 100644 index 00000000..5d1c9ebe --- /dev/null +++ b/util/cutils.c @@ -0,0 +1,568 @@ +/* + * Simple C functions to supplement the C library + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu/host-utils.h" +#include <math.h> +#include <limits.h> +#include <errno.h> + +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "net/net.h" + +void strpadcpy(char *buf, int buf_size, const char *str, char pad) +{ +    int len = qemu_strnlen(str, buf_size); +    memcpy(buf, str, len); +    memset(buf + len, pad, buf_size - len); +} + +void pstrcpy(char *buf, int buf_size, const char *str) +{ +    int c; +    char *q = buf; + +    if (buf_size <= 0) +        return; + +    for(;;) { +        c = *str++; +        if (c == 0 || q >= buf + buf_size - 1) +            break; +        *q++ = c; +    } +    *q = '\0'; +} + +/* strcat and truncate. */ +char *pstrcat(char *buf, int buf_size, const char *s) +{ +    int len; +    len = strlen(buf); +    if (len < buf_size) +        pstrcpy(buf + len, buf_size - len, s); +    return buf; +} + +int strstart(const char *str, const char *val, const char **ptr) +{ +    const char *p, *q; +    p = str; +    q = val; +    while (*q != '\0') { +        if (*p != *q) +            return 0; +        p++; +        q++; +    } +    if (ptr) +        *ptr = p; +    return 1; +} + +int stristart(const char *str, const char *val, const char **ptr) +{ +    const char *p, *q; +    p = str; +    q = val; +    while (*q != '\0') { +        if (qemu_toupper(*p) != qemu_toupper(*q)) +            return 0; +        p++; +        q++; +    } +    if (ptr) +        *ptr = p; +    return 1; +} + +/* XXX: use host strnlen if available ? */ +int qemu_strnlen(const char *s, int max_len) +{ +    int i; + +    for(i = 0; i < max_len; i++) { +        if (s[i] == '\0') { +            break; +        } +    } +    return i; +} + +char *qemu_strsep(char **input, const char *delim) +{ +    char *result = *input; +    if (result != NULL) { +        char *p; + +        for (p = result; *p != '\0'; p++) { +            if (strchr(delim, *p)) { +                break; +            } +        } +        if (*p == '\0') { +            *input = NULL; +        } else { +            *p = '\0'; +            *input = p + 1; +        } +    } +    return result; +} + +time_t mktimegm(struct tm *tm) +{ +    time_t t; +    int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday; +    if (m < 3) { +        m += 12; +        y--; +    } +    t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 +  +                 y / 400 - 719469); +    t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec; +    return t; +} + +int qemu_fls(int i) +{ +    return 32 - clz32(i); +} + +/* + * Make sure data goes on disk, but if possible do not bother to + * write out the inode just for timestamp updates. + * + * Unfortunately even in 2009 many operating systems do not support + * fdatasync and have to fall back to fsync. + */ +int qemu_fdatasync(int fd) +{ +#ifdef CONFIG_FDATASYNC +    return fdatasync(fd); +#else +    return fsync(fd); +#endif +} + +/* + * Searches for an area with non-zero content in a buffer + * + * Attention! The len must be a multiple of + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * and addr must be a multiple of sizeof(VECTYPE) due to + * restriction of optimizations in this function. + * + * can_use_buffer_find_nonzero_offset() can be used to check + * these requirements. + * + * The return value is the offset of the non-zero area rounded + * down to a multiple of sizeof(VECTYPE) for the first + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * afterwards. + * + * If the buffer is all zero the return value is equal to len. + */ + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ +    const VECTYPE *p = buf; +    const VECTYPE zero = (VECTYPE){0}; +    size_t i; + +    assert(can_use_buffer_find_nonzero_offset(buf, len)); + +    if (!len) { +        return 0; +    } + +    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { +        if (!ALL_EQ(p[i], zero)) { +            return i * sizeof(VECTYPE); +        } +    } + +    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; +         i < len / sizeof(VECTYPE); +         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { +        VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]); +        VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]); +        VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]); +        VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]); +        VECTYPE tmp01 = VEC_OR(tmp0, tmp1); +        VECTYPE tmp23 = VEC_OR(tmp2, tmp3); +        if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) { +            break; +        } +    } + +    return i * sizeof(VECTYPE); +} + +/* + * Checks if a buffer is all zeroes + * + * Attention! The len must be a multiple of 4 * sizeof(long) due to + * restriction of optimizations in this function. + */ +bool buffer_is_zero(const void *buf, size_t len) +{ +    /* +     * Use long as the biggest available internal data type that fits into the +     * CPU register and unroll the loop to smooth out the effect of memory +     * latency. +     */ + +    size_t i; +    long d0, d1, d2, d3; +    const long * const data = buf; + +    /* use vector optimized zero check if possible */ +    if (can_use_buffer_find_nonzero_offset(buf, len)) { +        return buffer_find_nonzero_offset(buf, len) == len; +    } + +    assert(len % (4 * sizeof(long)) == 0); +    len /= sizeof(long); + +    for (i = 0; i < len; i += 4) { +        d0 = data[i + 0]; +        d1 = data[i + 1]; +        d2 = data[i + 2]; +        d3 = data[i + 3]; + +        if (d0 || d1 || d2 || d3) { +            return false; +        } +    } + +    return true; +} + +#ifndef _WIN32 +/* Sets a specific flag */ +int fcntl_setfl(int fd, int flag) +{ +    int flags; + +    flags = fcntl(fd, F_GETFL); +    if (flags == -1) +        return -errno; + +    if (fcntl(fd, F_SETFL, flags | flag) == -1) +        return -errno; + +    return 0; +} +#endif + +static int64_t suffix_mul(char suffix, int64_t unit) +{ +    switch (qemu_toupper(suffix)) { +    case STRTOSZ_DEFSUFFIX_B: +        return 1; +    case STRTOSZ_DEFSUFFIX_KB: +        return unit; +    case STRTOSZ_DEFSUFFIX_MB: +        return unit * unit; +    case STRTOSZ_DEFSUFFIX_GB: +        return unit * unit * unit; +    case STRTOSZ_DEFSUFFIX_TB: +        return unit * unit * unit * unit; +    case STRTOSZ_DEFSUFFIX_PB: +        return unit * unit * unit * unit * unit; +    case STRTOSZ_DEFSUFFIX_EB: +        return unit * unit * unit * unit * unit * unit; +    } +    return -1; +} + +/* + * Convert string to bytes, allowing either B/b for bytes, K/k for KB, + * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned + * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on + * other error. + */ +int64_t strtosz_suffix_unit(const char *nptr, char **end, +                            const char default_suffix, int64_t unit) +{ +    int64_t retval = -EINVAL; +    char *endptr; +    unsigned char c; +    int mul_required = 0; +    double val, mul, integral, fraction; + +    errno = 0; +    val = strtod(nptr, &endptr); +    if (isnan(val) || endptr == nptr || errno != 0) { +        goto fail; +    } +    fraction = modf(val, &integral); +    if (fraction != 0) { +        mul_required = 1; +    } +    c = *endptr; +    mul = suffix_mul(c, unit); +    if (mul >= 0) { +        endptr++; +    } else { +        mul = suffix_mul(default_suffix, unit); +        assert(mul >= 0); +    } +    if (mul == 1 && mul_required) { +        goto fail; +    } +    if ((val * mul >= INT64_MAX) || val < 0) { +        retval = -ERANGE; +        goto fail; +    } +    retval = val * mul; + +fail: +    if (end) { +        *end = endptr; +    } + +    return retval; +} + +int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix) +{ +    return strtosz_suffix_unit(nptr, end, default_suffix, 1024); +} + +int64_t strtosz(const char *nptr, char **end) +{ +    return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB); +} + +/** + * parse_uint: + * + * @s: String to parse + * @value: Destination for parsed integer value + * @endptr: Destination for pointer to first character not consumed + * @base: integer base, between 2 and 36 inclusive, or 0 + * + * Parse unsigned integer + * + * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional + * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits. + * + * If @s is null, or @base is invalid, or @s doesn't start with an + * integer in the syntax above, set *@value to 0, *@endptr to @s, and + * return -EINVAL. + * + * Set *@endptr to point right beyond the parsed integer (even if the integer + * overflows or is negative, all digits will be parsed and *@endptr will + * point right beyond them). + * + * If the integer is negative, set *@value to 0, and return -ERANGE. + * + * If the integer overflows unsigned long long, set *@value to + * ULLONG_MAX, and return -ERANGE. + * + * Else, set *@value to the parsed integer, and return 0. + */ +int parse_uint(const char *s, unsigned long long *value, char **endptr, +               int base) +{ +    int r = 0; +    char *endp = (char *)s; +    unsigned long long val = 0; + +    if (!s) { +        r = -EINVAL; +        goto out; +    } + +    errno = 0; +    val = strtoull(s, &endp, base); +    if (errno) { +        r = -errno; +        goto out; +    } + +    if (endp == s) { +        r = -EINVAL; +        goto out; +    } + +    /* make sure we reject negative numbers: */ +    while (isspace((unsigned char)*s)) { +        s++; +    } +    if (*s == '-') { +        val = 0; +        r = -ERANGE; +        goto out; +    } + +out: +    *value = val; +    *endptr = endp; +    return r; +} + +/** + * parse_uint_full: + * + * @s: String to parse + * @value: Destination for parsed integer value + * @base: integer base, between 2 and 36 inclusive, or 0 + * + * Parse unsigned integer from entire string + * + * Have the same behavior of parse_uint(), but with an additional check + * for additional data after the parsed number. If extra characters are present + * after the parsed number, the function will return -EINVAL, and *@v will + * be set to 0. + */ +int parse_uint_full(const char *s, unsigned long long *value, int base) +{ +    char *endp; +    int r; + +    r = parse_uint(s, value, &endp, base); +    if (r < 0) { +        return r; +    } +    if (*endp) { +        *value = 0; +        return -EINVAL; +    } + +    return 0; +} + +int qemu_parse_fd(const char *param) +{ +    long fd; +    char *endptr; + +    errno = 0; +    fd = strtol(param, &endptr, 10); +    if (param == endptr /* no conversion performed */                    || +        errno != 0      /* not representable as long; possibly others */ || +        *endptr != '\0' /* final string not empty */                     || +        fd < 0          /* invalid as file descriptor */                 || +        fd > INT_MAX    /* not representable as int */) { +        return -1; +    } +    return fd; +} + +/* round down to the nearest power of 2*/ +int64_t pow2floor(int64_t value) +{ +    if (!is_power_of_2(value)) { +        value = 0x8000000000000000ULL >> clz64(value); +    } +    return value; +} + +/* round up to the nearest power of 2 (0 if overflow) */ +uint64_t pow2ceil(uint64_t value) +{ +    uint8_t nlz = clz64(value); + +    if (is_power_of_2(value)) { +        return value; +    } +    if (!nlz) { +        return 0; +    } +    return 1ULL << (64 - nlz); +} + +/* + * Implementation of  ULEB128 (http://en.wikipedia.org/wiki/LEB128) + * Input is limited to 14-bit numbers + */ +int uleb128_encode_small(uint8_t *out, uint32_t n) +{ +    g_assert(n <= 0x3fff); +    if (n < 0x80) { +        *out++ = n; +        return 1; +    } else { +        *out++ = (n & 0x7f) | 0x80; +        *out++ = n >> 7; +        return 2; +    } +} + +int uleb128_decode_small(const uint8_t *in, uint32_t *n) +{ +    if (!(*in & 0x80)) { +        *n = *in++; +        return 1; +    } else { +        *n = *in++ & 0x7f; +        /* we exceed 14 bit number */ +        if (*in & 0x80) { +            return -1; +        } +        *n |= *in++ << 7; +        return 2; +    } +} + +/* + * helper to parse debug environment variables + */ +int parse_debug_env(const char *name, int max, int initial) +{ +    char *debug_env = getenv(name); +    char *inv = NULL; +    long debug; + +    if (!debug_env) { +        return initial; +    } +    errno = 0; +    debug = strtol(debug_env, &inv, 10); +    if (inv == debug_env) { +        return initial; +    } +    if (debug < 0 || debug > max || errno != 0) { +        fprintf(stderr, "warning: %s not in [0, %d]", name, max); +        return initial; +    } +    return debug; +} + +/* + * Helper to print ethernet mac address + */ +const char *qemu_ether_ntoa(const MACAddr *mac) +{ +    static char ret[18]; + +    snprintf(ret, sizeof(ret), "%02x:%02x:%02x:%02x:%02x:%02x", +             mac->a[0], mac->a[1], mac->a[2], mac->a[3], mac->a[4], mac->a[5]); + +    return ret; +} diff --git a/util/envlist.c b/util/envlist.c new file mode 100644 index 00000000..099a544a --- /dev/null +++ b/util/envlist.c @@ -0,0 +1,241 @@ +#include "qemu-common.h" +#include "qemu/queue.h" +#include "qemu/envlist.h" + +struct envlist_entry { +	const char *ev_var;			/* actual env value */ +	QLIST_ENTRY(envlist_entry) ev_link; +}; + +struct envlist { +	QLIST_HEAD(, envlist_entry) el_entries;	/* actual entries */ +	size_t el_count;			/* number of entries */ +}; + +static int envlist_parse(envlist_t *envlist, +    const char *env, int (*)(envlist_t *, const char *)); + +/* + * Allocates new envlist and returns pointer to that or + * NULL in case of error. + */ +envlist_t * +envlist_create(void) +{ +	envlist_t *envlist; + +	if ((envlist = malloc(sizeof (*envlist))) == NULL) +		return (NULL); + +	QLIST_INIT(&envlist->el_entries); +	envlist->el_count = 0; + +	return (envlist); +} + +/* + * Releases given envlist and its entries. + */ +void +envlist_free(envlist_t *envlist) +{ +	struct envlist_entry *entry; + +	assert(envlist != NULL); + +	while (envlist->el_entries.lh_first != NULL) { +		entry = envlist->el_entries.lh_first; +		QLIST_REMOVE(entry, ev_link); + +		free((char *)entry->ev_var); +		free(entry); +	} +	free(envlist); +} + +/* + * Parses comma separated list of set/modify environment + * variable entries and updates given enlist accordingly. + * + * For example: + *     envlist_parse(el, "HOME=foo,SHELL=/bin/sh"); + * + * inserts/sets environment variables HOME and SHELL. + * + * Returns 0 on success, errno otherwise. + */ +int +envlist_parse_set(envlist_t *envlist, const char *env) +{ +	return (envlist_parse(envlist, env, &envlist_setenv)); +} + +/* + * Parses comma separated list of unset environment variable + * entries and removes given variables from given envlist. + * + * Returns 0 on success, errno otherwise. + */ +int +envlist_parse_unset(envlist_t *envlist, const char *env) +{ +	return (envlist_parse(envlist, env, &envlist_unsetenv)); +} + +/* + * Parses comma separated list of set, modify or unset entries + * and calls given callback for each entry. + * + * Returns 0 in case of success, errno otherwise. + */ +static int +envlist_parse(envlist_t *envlist, const char *env, +    int (*callback)(envlist_t *, const char *)) +{ +	char *tmpenv, *envvar; +	char *envsave = NULL; +    int ret = 0; +    assert(callback != NULL); + +	if ((envlist == NULL) || (env == NULL)) +		return (EINVAL); + +	if ((tmpenv = strdup(env)) == NULL) +		return (errno); +    envsave = tmpenv; + +    do { +        envvar = strchr(tmpenv, ','); +        if (envvar != NULL) { +            *envvar = '\0'; +        } +        if ((*callback)(envlist, tmpenv) != 0) { +            ret = errno; +            break; +		} +        tmpenv = envvar + 1; +    } while (envvar != NULL); + +    free(envsave); +    return ret; +} + +/* + * Sets environment value to envlist in similar manner + * than putenv(3). + * + * Returns 0 in success, errno otherwise. + */ +int +envlist_setenv(envlist_t *envlist, const char *env) +{ +	struct envlist_entry *entry = NULL; +	const char *eq_sign; +	size_t envname_len; + +	if ((envlist == NULL) || (env == NULL)) +		return (EINVAL); + +	/* find out first equals sign in given env */ +	if ((eq_sign = strchr(env, '=')) == NULL) +		return (EINVAL); +	envname_len = eq_sign - env + 1; + +	/* +	 * If there already exists variable with given name +	 * we remove and release it before allocating a whole +	 * new entry. +	 */ +	for (entry = envlist->el_entries.lh_first; entry != NULL; +	    entry = entry->ev_link.le_next) { +		if (strncmp(entry->ev_var, env, envname_len) == 0) +			break; +	} + +	if (entry != NULL) { +		QLIST_REMOVE(entry, ev_link); +		free((char *)entry->ev_var); +		free(entry); +	} else { +		envlist->el_count++; +	} + +	if ((entry = malloc(sizeof (*entry))) == NULL) +		return (errno); +	if ((entry->ev_var = strdup(env)) == NULL) { +		free(entry); +		return (errno); +	} +	QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link); + +	return (0); +} + +/* + * Removes given env value from envlist in similar manner + * than unsetenv(3).  Returns 0 in success, errno otherwise. + */ +int +envlist_unsetenv(envlist_t *envlist, const char *env) +{ +	struct envlist_entry *entry; +	size_t envname_len; + +	if ((envlist == NULL) || (env == NULL)) +		return (EINVAL); + +	/* env is not allowed to contain '=' */ +	if (strchr(env, '=') != NULL) +		return (EINVAL); + +	/* +	 * Find out the requested entry and remove +	 * it from the list. +	 */ +	envname_len = strlen(env); +	for (entry = envlist->el_entries.lh_first; entry != NULL; +	    entry = entry->ev_link.le_next) { +		if (strncmp(entry->ev_var, env, envname_len) == 0) +			break; +	} +	if (entry != NULL) { +		QLIST_REMOVE(entry, ev_link); +		free((char *)entry->ev_var); +		free(entry); + +		envlist->el_count--; +	} +	return (0); +} + +/* + * Returns given envlist as array of strings (in same form that + * global variable environ is).  Caller must free returned memory + * by calling free(3) for each element and for the array.  Returned + * array and given envlist are not related (no common references). + * + * If caller provides count pointer, number of items in array is + * stored there.  In case of error, NULL is returned and no memory + * is allocated. + */ +char ** +envlist_to_environ(const envlist_t *envlist, size_t *count) +{ +	struct envlist_entry *entry; +	char **env, **penv; + +	penv = env = malloc((envlist->el_count + 1) * sizeof (char *)); +	if (env == NULL) +		return (NULL); + +	for (entry = envlist->el_entries.lh_first; entry != NULL; +	    entry = entry->ev_link.le_next) { +		*(penv++) = strdup(entry->ev_var); +	} +	*penv = NULL; /* NULL terminate the list */ + +	if (count != NULL) +		*count = envlist->el_count; + +	return (env); +} diff --git a/util/error.c b/util/error.c new file mode 100644 index 00000000..14f43518 --- /dev/null +++ b/util/error.c @@ -0,0 +1,179 @@ +/* + * QEMU Error Objects + * + * Copyright IBM, Corp. 2011 + * + * Authors: + *  Anthony Liguori   <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2.  See + * the COPYING.LIB file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +struct Error +{ +    char *msg; +    ErrorClass err_class; +}; + +Error *error_abort; + +void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...) +{ +    Error *err; +    va_list ap; +    int saved_errno = errno; + +    if (errp == NULL) { +        return; +    } +    assert(*errp == NULL); + +    err = g_malloc0(sizeof(*err)); + +    va_start(ap, fmt); +    err->msg = g_strdup_vprintf(fmt, ap); +    va_end(ap); +    err->err_class = err_class; + +    if (errp == &error_abort) { +        error_report_err(err); +        abort(); +    } + +    *errp = err; + +    errno = saved_errno; +} + +void error_set_errno(Error **errp, int os_errno, ErrorClass err_class, +                     const char *fmt, ...) +{ +    Error *err; +    char *msg1; +    va_list ap; +    int saved_errno = errno; + +    if (errp == NULL) { +        return; +    } +    assert(*errp == NULL); + +    err = g_malloc0(sizeof(*err)); + +    va_start(ap, fmt); +    msg1 = g_strdup_vprintf(fmt, ap); +    if (os_errno != 0) { +        err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno)); +        g_free(msg1); +    } else { +        err->msg = msg1; +    } +    va_end(ap); +    err->err_class = err_class; + +    if (errp == &error_abort) { +        error_report_err(err); +        abort(); +    } + +    *errp = err; + +    errno = saved_errno; +} + +void error_setg_file_open(Error **errp, int os_errno, const char *filename) +{ +    error_setg_errno(errp, os_errno, "Could not open '%s'", filename); +} + +#ifdef _WIN32 + +void error_set_win32(Error **errp, int win32_err, ErrorClass err_class, +                     const char *fmt, ...) +{ +    Error *err; +    char *msg1; +    va_list ap; + +    if (errp == NULL) { +        return; +    } +    assert(*errp == NULL); + +    err = g_malloc0(sizeof(*err)); + +    va_start(ap, fmt); +    msg1 = g_strdup_vprintf(fmt, ap); +    if (win32_err != 0) { +        char *msg2 = g_win32_error_message(win32_err); +        err->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2, +                                   (unsigned)win32_err); +        g_free(msg2); +        g_free(msg1); +    } else { +        err->msg = msg1; +    } +    va_end(ap); +    err->err_class = err_class; + +    if (errp == &error_abort) { +        error_report_err(err); +        abort(); +    } + +    *errp = err; +} + +#endif + +Error *error_copy(const Error *err) +{ +    Error *err_new; + +    err_new = g_malloc0(sizeof(*err)); +    err_new->msg = g_strdup(err->msg); +    err_new->err_class = err->err_class; + +    return err_new; +} + +ErrorClass error_get_class(const Error *err) +{ +    return err->err_class; +} + +const char *error_get_pretty(Error *err) +{ +    return err->msg; +} + +void error_report_err(Error *err) +{ +    error_report("%s", error_get_pretty(err)); +    error_free(err); +} + +void error_free(Error *err) +{ +    if (err) { +        g_free(err->msg); +        g_free(err); +    } +} + +void error_propagate(Error **dst_errp, Error *local_err) +{ +    if (local_err && dst_errp == &error_abort) { +        error_report_err(local_err); +        abort(); +    } else if (dst_errp && !*dst_errp) { +        *dst_errp = local_err; +    } else if (local_err) { +        error_free(local_err); +    } +} diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c new file mode 100644 index 00000000..ed4ca2b0 --- /dev/null +++ b/util/event_notifier-posix.c @@ -0,0 +1,122 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + *  Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qemu/event_notifier.h" +#include "sysemu/char.h" +#include "qemu/main-loop.h" + +#ifdef CONFIG_EVENTFD +#include <sys/eventfd.h> +#endif + +void event_notifier_init_fd(EventNotifier *e, int fd) +{ +    e->rfd = fd; +    e->wfd = fd; +} + +int event_notifier_init(EventNotifier *e, int active) +{ +    int fds[2]; +    int ret; + +#ifdef CONFIG_EVENTFD +    ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); +#else +    ret = -1; +    errno = ENOSYS; +#endif +    if (ret >= 0) { +        e->rfd = e->wfd = ret; +    } else { +        if (errno != ENOSYS) { +            return -errno; +        } +        if (qemu_pipe(fds) < 0) { +            return -errno; +        } +        ret = fcntl_setfl(fds[0], O_NONBLOCK); +        if (ret < 0) { +            ret = -errno; +            goto fail; +        } +        ret = fcntl_setfl(fds[1], O_NONBLOCK); +        if (ret < 0) { +            ret = -errno; +            goto fail; +        } +        e->rfd = fds[0]; +        e->wfd = fds[1]; +    } +    if (active) { +        event_notifier_set(e); +    } +    return 0; + +fail: +    close(fds[0]); +    close(fds[1]); +    return ret; +} + +void event_notifier_cleanup(EventNotifier *e) +{ +    if (e->rfd != e->wfd) { +        close(e->rfd); +    } +    close(e->wfd); +} + +int event_notifier_get_fd(EventNotifier *e) +{ +    return e->rfd; +} + +int event_notifier_set_handler(EventNotifier *e, +                               EventNotifierHandler *handler) +{ +    qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e); +    return 0; +} + +int event_notifier_set(EventNotifier *e) +{ +    static const uint64_t value = 1; +    ssize_t ret; + +    do { +        ret = write(e->wfd, &value, sizeof(value)); +    } while (ret < 0 && errno == EINTR); + +    /* EAGAIN is fine, a read must be pending.  */ +    if (ret < 0 && errno != EAGAIN) { +        return -errno; +    } +    return 0; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ +    int value; +    ssize_t len; +    char buffer[512]; + +    /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */ +    value = 0; +    do { +        len = read(e->rfd, buffer, sizeof(buffer)); +        value |= (len > 0); +    } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); + +    return value; +} diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c new file mode 100644 index 00000000..6dbb530c --- /dev/null +++ b/util/event_notifier-win32.c @@ -0,0 +1,59 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + *  Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qemu/event_notifier.h" +#include "qemu/main-loop.h" + +int event_notifier_init(EventNotifier *e, int active) +{ +    e->event = CreateEvent(NULL, TRUE, FALSE, NULL); +    assert(e->event); +    return 0; +} + +void event_notifier_cleanup(EventNotifier *e) +{ +    CloseHandle(e->event); +} + +HANDLE event_notifier_get_handle(EventNotifier *e) +{ +    return e->event; +} + +int event_notifier_set_handler(EventNotifier *e, +                               EventNotifierHandler *handler) +{ +    if (handler) { +        return qemu_add_wait_object(e->event, (IOHandler *)handler, e); +    } else { +        qemu_del_wait_object(e->event, (IOHandler *)handler, e); +        return 0; +    } +} + +int event_notifier_set(EventNotifier *e) +{ +    SetEvent(e->event); +    return 0; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ +    int ret = WaitForSingleObject(e->event, 0); +    if (ret == WAIT_OBJECT_0) { +        ResetEvent(e->event); +        return true; +    } +    return false; +} diff --git a/util/fifo8.c b/util/fifo8.c new file mode 100644 index 00000000..0ea5ad98 --- /dev/null +++ b/util/fifo8.c @@ -0,0 +1,125 @@ +/* + * Generic FIFO component, implemented as a circular buffer. + * + * Copyright (c) 2012 Peter A. G. Crosthwaite + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu-common.h" +#include "qemu/fifo8.h" + +void fifo8_create(Fifo8 *fifo, uint32_t capacity) +{ +    fifo->data = g_new(uint8_t, capacity); +    fifo->capacity = capacity; +    fifo->head = 0; +    fifo->num = 0; +} + +void fifo8_destroy(Fifo8 *fifo) +{ +    g_free(fifo->data); +} + +void fifo8_push(Fifo8 *fifo, uint8_t data) +{ +    if (fifo->num == fifo->capacity) { +        abort(); +    } +    fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data; +    fifo->num++; +} + +void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num) +{ +    uint32_t start, avail; + +    if (fifo->num + num > fifo->capacity) { +        abort(); +    } + +    start = (fifo->head + fifo->num) % fifo->capacity; + +    if (start + num <= fifo->capacity) { +        memcpy(&fifo->data[start], data, num); +    } else { +        avail = fifo->capacity - start; +        memcpy(&fifo->data[start], data, avail); +        memcpy(&fifo->data[0], &data[avail], num - avail); +    } + +    fifo->num += num; +} + +uint8_t fifo8_pop(Fifo8 *fifo) +{ +    uint8_t ret; + +    if (fifo->num == 0) { +        abort(); +    } +    ret = fifo->data[fifo->head++]; +    fifo->head %= fifo->capacity; +    fifo->num--; +    return ret; +} + +const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num) +{ +    uint8_t *ret; + +    if (max == 0 || max > fifo->num) { +        abort(); +    } +    *num = MIN(fifo->capacity - fifo->head, max); +    ret = &fifo->data[fifo->head]; +    fifo->head += *num; +    fifo->head %= fifo->capacity; +    fifo->num -= *num; +    return ret; +} + +void fifo8_reset(Fifo8 *fifo) +{ +    fifo->num = 0; +    fifo->head = 0; +} + +bool fifo8_is_empty(Fifo8 *fifo) +{ +    return (fifo->num == 0); +} + +bool fifo8_is_full(Fifo8 *fifo) +{ +    return (fifo->num == fifo->capacity); +} + +uint32_t fifo8_num_free(Fifo8 *fifo) +{ +    return fifo->capacity - fifo->num; +} + +uint32_t fifo8_num_used(Fifo8 *fifo) +{ +    return fifo->num; +} + +const VMStateDescription vmstate_fifo8 = { +    .name = "Fifo8", +    .version_id = 1, +    .minimum_version_id = 1, +    .fields = (VMStateField[]) { +        VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity), +        VMSTATE_UINT32(head, Fifo8), +        VMSTATE_UINT32(num, Fifo8), +        VMSTATE_END_OF_LIST() +    } +}; diff --git a/util/getauxval.c b/util/getauxval.c new file mode 100644 index 00000000..1732ace2 --- /dev/null +++ b/util/getauxval.c @@ -0,0 +1,109 @@ +/* + * QEMU access to the auxiliary vector + * + * Copyright (C) 2013 Red Hat, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/osdep.h" + +#ifdef CONFIG_GETAUXVAL +/* Don't inline this in qemu/osdep.h, because pulling in <sys/auxv.h> for +   the system declaration of getauxval pulls in the system <elf.h>, which +   conflicts with qemu's version.  */ + +#include <sys/auxv.h> + +unsigned long qemu_getauxval(unsigned long key) +{ +    return getauxval(key); +} +#elif defined(__linux__) +#include "elf.h" + +/* Our elf.h doesn't contain Elf32_auxv_t and Elf64_auxv_t, which is ok because +   that just makes it easier to define it properly for the host here.  */ +typedef struct { +    unsigned long a_type; +    unsigned long a_val; +} ElfW_auxv_t; + +static const ElfW_auxv_t *auxv; + +static const ElfW_auxv_t *qemu_init_auxval(void) +{ +    ElfW_auxv_t *a; +    ssize_t size = 512, r, ofs; +    int fd; + +    /* Allocate some initial storage.  Make sure the first entry is set +       to end-of-list, so that we've got a valid list in case of error.  */ +    auxv = a = g_malloc(size); +    a[0].a_type = 0; +    a[0].a_val = 0; + +    fd = open("/proc/self/auxv", O_RDONLY); +    if (fd < 0) { +        return a; +    } + +    /* Read the first SIZE bytes.  Hopefully, this covers everything.  */ +    r = read(fd, a, size); + +    if (r == size) { +        /* Continue to expand until we do get a partial read.  */ +        do { +            ofs = size; +            size *= 2; +            auxv = a = g_realloc(a, size); +            r = read(fd, (char *)a + ofs, ofs); +        } while (r == ofs); +    } + +    close(fd); +    return a; +} + +unsigned long qemu_getauxval(unsigned long type) +{ +    const ElfW_auxv_t *a = auxv; + +    if (unlikely(a == NULL)) { +        a = qemu_init_auxval(); +    } + +    for (; a->a_type != 0; a++) { +        if (a->a_type == type) { +            return a->a_val; +        } +    } + +    return 0; +} + +#else + +unsigned long qemu_getauxval(unsigned long type) +{ +    return 0; +} + +#endif diff --git a/util/hbitmap.c b/util/hbitmap.c new file mode 100644 index 00000000..50b888fd --- /dev/null +++ b/util/hbitmap.c @@ -0,0 +1,495 @@ +/* + * Hierarchical Bitmap Data Type + * + * Copyright Red Hat, Inc., 2012 + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later.  See the COPYING file in the top-level directory. + */ + +#include <string.h> +#include <glib.h> +#include <assert.h> +#include "qemu/osdep.h" +#include "qemu/hbitmap.h" +#include "qemu/host-utils.h" +#include "trace.h" + +/* HBitmaps provides an array of bits.  The bits are stored as usual in an + * array of unsigned longs, but HBitmap is also optimized to provide fast + * iteration over set bits; going from one bit to the next is O(logB n) + * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough + * that the number of levels is in fact fixed. + * + * In order to do this, it stacks multiple bitmaps with progressively coarser + * granularity; in all levels except the last, bit N is set iff the N-th + * unsigned long is nonzero in the immediately next level.  When iteration + * completes on the last level it can examine the 2nd-last level to quickly + * skip entire words, and even do so recursively to skip blocks of 64 words or + * powers thereof (32 on 32-bit machines). + * + * Given an index in the bitmap, it can be split in group of bits like + * this (for the 64-bit case): + * + *   bits 0-57 => word in the last bitmap     | bits 58-63 => bit in the word + *   bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word + *   bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word + * + * So it is easy to move up simply by shifting the index right by + * log2(BITS_PER_LONG) bits.  To move down, you shift the index left + * similarly, and add the word index within the group.  Iteration uses + * ffs (find first set bit) to find the next word to examine; this + * operation can be done in constant time in most current architectures. + * + * Setting or clearing a range of m bits on all levels, the work to perform + * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap. + * + * When iterating on a bitmap, each bit (on any level) is only visited + * once.  Hence, The total cost of visiting a bitmap with m bits in it is + * the number of bits that are set in all bitmaps.  Unless the bitmap is + * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized + * cost of advancing from one bit to the next is usually constant (worst case + * O(logB n) as in the non-amortized complexity). + */ + +struct HBitmap { +    /* Number of total bits in the bottom level.  */ +    uint64_t size; + +    /* Number of set bits in the bottom level.  */ +    uint64_t count; + +    /* A scaling factor.  Given a granularity of G, each bit in the bitmap will +     * will actually represent a group of 2^G elements.  Each operation on a +     * range of bits first rounds the bits to determine which group they land +     * in, and then affect the entire page; iteration will only visit the first +     * bit of each group.  Here is an example of operations in a size-16, +     * granularity-1 HBitmap: +     * +     *    initial state            00000000 +     *    set(start=0, count=9)    11111000 (iter: 0, 2, 4, 6, 8) +     *    reset(start=1, count=3)  00111000 (iter: 4, 6, 8) +     *    set(start=9, count=2)    00111100 (iter: 4, 6, 8, 10) +     *    reset(start=5, count=5)  00000000 +     * +     * From an implementation point of view, when setting or resetting bits, +     * the bitmap will scale bit numbers right by this amount of bits.  When +     * iterating, the bitmap will scale bit numbers left by this amount of +     * bits. +     */ +    int granularity; + +    /* A number of progressively less coarse bitmaps (i.e. level 0 is the +     * coarsest).  Each bit in level N represents a word in level N+1 that +     * has a set bit, except the last level where each bit represents the +     * actual bitmap. +     * +     * Note that all bitmaps have the same number of levels.  Even a 1-bit +     * bitmap will still allocate HBITMAP_LEVELS arrays. +     */ +    unsigned long *levels[HBITMAP_LEVELS]; + +    /* The length of each levels[] array. */ +    uint64_t sizes[HBITMAP_LEVELS]; +}; + +/* Advance hbi to the next nonzero word and return it.  hbi->pos + * is updated.  Returns zero if we reach the end of the bitmap. + */ +unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) +{ +    size_t pos = hbi->pos; +    const HBitmap *hb = hbi->hb; +    unsigned i = HBITMAP_LEVELS - 1; + +    unsigned long cur; +    do { +        cur = hbi->cur[--i]; +        pos >>= BITS_PER_LEVEL; +    } while (cur == 0); + +    /* Check for end of iteration.  We always use fewer than BITS_PER_LONG +     * bits in the level 0 bitmap; thus we can repurpose the most significant +     * bit as a sentinel.  The sentinel is set in hbitmap_alloc and ensures +     * that the above loop ends even without an explicit check on i. +     */ + +    if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) { +        return 0; +    } +    for (; i < HBITMAP_LEVELS - 1; i++) { +        /* Shift back pos to the left, matching the right shifts above. +         * The index of this word's least significant set bit provides +         * the low-order bits. +         */ +        assert(cur); +        pos = (pos << BITS_PER_LEVEL) + ctzl(cur); +        hbi->cur[i] = cur & (cur - 1); + +        /* Set up next level for iteration.  */ +        cur = hb->levels[i + 1][pos]; +    } + +    hbi->pos = pos; +    trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur); + +    assert(cur); +    return cur; +} + +void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first) +{ +    unsigned i, bit; +    uint64_t pos; + +    hbi->hb = hb; +    pos = first >> hb->granularity; +    assert(pos < hb->size); +    hbi->pos = pos >> BITS_PER_LEVEL; +    hbi->granularity = hb->granularity; + +    for (i = HBITMAP_LEVELS; i-- > 0; ) { +        bit = pos & (BITS_PER_LONG - 1); +        pos >>= BITS_PER_LEVEL; + +        /* Drop bits representing items before first.  */ +        hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1); + +        /* We have already added level i+1, so the lowest set bit has +         * been processed.  Clear it. +         */ +        if (i != HBITMAP_LEVELS - 1) { +            hbi->cur[i] &= ~(1UL << bit); +        } +    } +} + +bool hbitmap_empty(const HBitmap *hb) +{ +    return hb->count == 0; +} + +int hbitmap_granularity(const HBitmap *hb) +{ +    return hb->granularity; +} + +uint64_t hbitmap_count(const HBitmap *hb) +{ +    return hb->count << hb->granularity; +} + +/* Count the number of set bits between start and end, not accounting for + * the granularity.  Also an example of how to use hbitmap_iter_next_word. + */ +static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) +{ +    HBitmapIter hbi; +    uint64_t count = 0; +    uint64_t end = last + 1; +    unsigned long cur; +    size_t pos; + +    hbitmap_iter_init(&hbi, hb, start << hb->granularity); +    for (;;) { +        pos = hbitmap_iter_next_word(&hbi, &cur); +        if (pos >= (end >> BITS_PER_LEVEL)) { +            break; +        } +        count += ctpopl(cur); +    } + +    if (pos == (end >> BITS_PER_LEVEL)) { +        /* Drop bits representing the END-th and subsequent items.  */ +        int bit = end & (BITS_PER_LONG - 1); +        cur &= (1UL << bit) - 1; +        count += ctpopl(cur); +    } + +    return count; +} + +/* Setting starts at the last layer and propagates up if an element + * changes from zero to non-zero. + */ +static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) +{ +    unsigned long mask; +    bool changed; + +    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); +    assert(start <= last); + +    mask = 2UL << (last & (BITS_PER_LONG - 1)); +    mask -= 1UL << (start & (BITS_PER_LONG - 1)); +    changed = (*elem == 0); +    *elem |= mask; +    return changed; +} + +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ +static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +{ +    size_t pos = start >> BITS_PER_LEVEL; +    size_t lastpos = last >> BITS_PER_LEVEL; +    bool changed = false; +    size_t i; + +    i = pos; +    if (i < lastpos) { +        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; +        changed |= hb_set_elem(&hb->levels[level][i], start, next - 1); +        for (;;) { +            start = next; +            next += BITS_PER_LONG; +            if (++i == lastpos) { +                break; +            } +            changed |= (hb->levels[level][i] == 0); +            hb->levels[level][i] = ~0UL; +        } +    } +    changed |= hb_set_elem(&hb->levels[level][i], start, last); + +    /* If there was any change in this layer, we may have to update +     * the one above. +     */ +    if (level > 0 && changed) { +        hb_set_between(hb, level - 1, pos, lastpos); +    } +} + +void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) +{ +    /* Compute range in the last layer.  */ +    uint64_t last = start + count - 1; + +    trace_hbitmap_set(hb, start, count, +                      start >> hb->granularity, last >> hb->granularity); + +    start >>= hb->granularity; +    last >>= hb->granularity; +    count = last - start + 1; + +    hb->count += count - hb_count_between(hb, start, last); +    hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); +} + +/* Resetting works the other way round: propagate up if the new + * value is zero. + */ +static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last) +{ +    unsigned long mask; +    bool blanked; + +    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); +    assert(start <= last); + +    mask = 2UL << (last & (BITS_PER_LONG - 1)); +    mask -= 1UL << (start & (BITS_PER_LONG - 1)); +    blanked = *elem != 0 && ((*elem & ~mask) == 0); +    *elem &= ~mask; +    return blanked; +} + +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ +static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +{ +    size_t pos = start >> BITS_PER_LEVEL; +    size_t lastpos = last >> BITS_PER_LEVEL; +    bool changed = false; +    size_t i; + +    i = pos; +    if (i < lastpos) { +        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; + +        /* Here we need a more complex test than when setting bits.  Even if +         * something was changed, we must not blank bits in the upper level +         * unless the lower-level word became entirely zero.  So, remove pos +         * from the upper-level range if bits remain set. +         */ +        if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) { +            changed = true; +        } else { +            pos++; +        } + +        for (;;) { +            start = next; +            next += BITS_PER_LONG; +            if (++i == lastpos) { +                break; +            } +            changed |= (hb->levels[level][i] != 0); +            hb->levels[level][i] = 0UL; +        } +    } + +    /* Same as above, this time for lastpos.  */ +    if (hb_reset_elem(&hb->levels[level][i], start, last)) { +        changed = true; +    } else { +        lastpos--; +    } + +    if (level > 0 && changed) { +        hb_reset_between(hb, level - 1, pos, lastpos); +    } +} + +void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) +{ +    /* Compute range in the last layer.  */ +    uint64_t last = start + count - 1; + +    trace_hbitmap_reset(hb, start, count, +                        start >> hb->granularity, last >> hb->granularity); + +    start >>= hb->granularity; +    last >>= hb->granularity; + +    hb->count -= hb_count_between(hb, start, last); +    hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); +} + +void hbitmap_reset_all(HBitmap *hb) +{ +    unsigned int i; + +    /* Same as hbitmap_alloc() except for memset() instead of malloc() */ +    for (i = HBITMAP_LEVELS; --i >= 1; ) { +        memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long)); +    } + +    hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1); +    hb->count = 0; +} + +bool hbitmap_get(const HBitmap *hb, uint64_t item) +{ +    /* Compute position and bit in the last layer.  */ +    uint64_t pos = item >> hb->granularity; +    unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1)); + +    return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; +} + +void hbitmap_free(HBitmap *hb) +{ +    unsigned i; +    for (i = HBITMAP_LEVELS; i-- > 0; ) { +        g_free(hb->levels[i]); +    } +    g_free(hb); +} + +HBitmap *hbitmap_alloc(uint64_t size, int granularity) +{ +    HBitmap *hb = g_new0(struct HBitmap, 1); +    unsigned i; + +    assert(granularity >= 0 && granularity < 64); +    size = (size + (1ULL << granularity) - 1) >> granularity; +    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); + +    hb->size = size; +    hb->granularity = granularity; +    for (i = HBITMAP_LEVELS; i-- > 0; ) { +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); +        hb->sizes[i] = size; +        hb->levels[i] = g_new0(unsigned long, size); +    } + +    /* We necessarily have free bits in level 0 due to the definition +     * of HBITMAP_LEVELS, so use one for a sentinel.  This speeds up +     * hbitmap_iter_skip_words. +     */ +    assert(size == 1); +    hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); +    return hb; +} + +void hbitmap_truncate(HBitmap *hb, uint64_t size) +{ +    bool shrink; +    unsigned i; +    uint64_t num_elements = size; +    uint64_t old; + +    /* Size comes in as logical elements, adjust for granularity. */ +    size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity; +    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); +    shrink = size < hb->size; + +    /* bit sizes are identical; nothing to do. */ +    if (size == hb->size) { +        return; +    } + +    /* If we're losing bits, let's clear those bits before we invalidate all of +     * our invariants. This helps keep the bitcount consistent, and will prevent +     * us from carrying around garbage bits beyond the end of the map. +     */ +    if (shrink) { +        /* Don't clear partial granularity groups; +         * start at the first full one. */ +        uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity); +        uint64_t fix_count = (hb->size << hb->granularity) - start; + +        assert(fix_count); +        hbitmap_reset(hb, start, fix_count); +    } + +    hb->size = size; +    for (i = HBITMAP_LEVELS; i-- > 0; ) { +        size = MAX(BITS_TO_LONGS(size), 1); +        if (hb->sizes[i] == size) { +            break; +        } +        old = hb->sizes[i]; +        hb->sizes[i] = size; +        hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long)); +        if (!shrink) { +            memset(&hb->levels[i][old], 0x00, +                   (size - old) * sizeof(*hb->levels[i])); +        } +    } +} + + +/** + * Given HBitmaps A and B, let A := A (BITOR) B. + * Bitmap B will not be modified. + * + * @return true if the merge was successful, + *         false if it was not attempted. + */ +bool hbitmap_merge(HBitmap *a, const HBitmap *b) +{ +    int i; +    uint64_t j; + +    if ((a->size != b->size) || (a->granularity != b->granularity)) { +        return false; +    } + +    if (hbitmap_count(b) == 0) { +        return true; +    } + +    /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant. +     * It may be possible to improve running times for sparsely populated maps +     * by using hbitmap_iter_next, but this is suboptimal for dense maps. +     */ +    for (i = HBITMAP_LEVELS - 1; i >= 0; i--) { +        for (j = 0; j < a->sizes[i]; j++) { +            a->levels[i][j] |= b->levels[i][j]; +        } +    } + +    return true; +} diff --git a/util/hexdump.c b/util/hexdump.c new file mode 100644 index 00000000..969b3406 --- /dev/null +++ b/util/hexdump.c @@ -0,0 +1,37 @@ +/* + * Helper to hexdump a buffer + * + * Copyright (c) 2013 Red Hat, Inc. + * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com> + * Copyright (c) 2013 Peter Crosthwaite <peter.crosthwaite@xilinx.com> + * Copyright (c) 2013 Xilinx, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" + +void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) +{ +    unsigned int b; + +    for (b = 0; b < size; b++) { +        if ((b % 16) == 0) { +            fprintf(fp, "%s: %04x:", prefix, b); +        } +        if ((b % 4) == 0) { +            fprintf(fp, " "); +        } +        fprintf(fp, " %02x", (unsigned char)buf[b]); +        if ((b % 16) == 15) { +            fprintf(fp, "\n"); +        } +    } +    if ((b % 16) != 0) { +        fprintf(fp, "\n"); +    } +} diff --git a/util/host-utils.c b/util/host-utils.c new file mode 100644 index 00000000..102e5bf3 --- /dev/null +++ b/util/host-utils.c @@ -0,0 +1,162 @@ +/* + * Utility compute operations used by translated code. + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2007 Aurelien Jarno + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdint.h> +#include "qemu/host-utils.h" + +/* Long integer helpers */ +static inline void mul64(uint64_t *plow, uint64_t *phigh, +                         uint64_t a, uint64_t b) +{ +    typedef union { +        uint64_t ll; +        struct { +#ifdef HOST_WORDS_BIGENDIAN +            uint32_t high, low; +#else +            uint32_t low, high; +#endif +        } l; +    } LL; +    LL rl, rm, rn, rh, a0, b0; +    uint64_t c; + +    a0.ll = a; +    b0.ll = b; + +    rl.ll = (uint64_t)a0.l.low * b0.l.low; +    rm.ll = (uint64_t)a0.l.low * b0.l.high; +    rn.ll = (uint64_t)a0.l.high * b0.l.low; +    rh.ll = (uint64_t)a0.l.high * b0.l.high; + +    c = (uint64_t)rl.l.high + rm.l.low + rn.l.low; +    rl.l.high = c; +    c >>= 32; +    c = c + rm.l.high + rn.l.high + rh.l.low; +    rh.l.low = c; +    rh.l.high += (uint32_t)(c >> 32); + +    *plow = rl.ll; +    *phigh = rh.ll; +} + +/* Unsigned 64x64 -> 128 multiplication */ +void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ +    mul64(plow, phigh, a, b); +} + +/* Signed 64x64 -> 128 multiplication */ +void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) +{ +    uint64_t rh; + +    mul64(plow, &rh, a, b); + +    /* Adjust for signs.  */ +    if (b < 0) { +        rh -= a; +    } +    if (a < 0) { +        rh -= b; +    } +    *phigh = rh; +} + +/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */ +/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */ +/* remainder via phigh. */ +int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) +{ +    uint64_t dhi = *phigh; +    uint64_t dlo = *plow; +    unsigned i; +    uint64_t carry = 0; + +    if (divisor == 0) { +        return 1; +    } else if (dhi == 0) { +        *plow  = dlo / divisor; +        *phigh = dlo % divisor; +        return 0; +    } else if (dhi > divisor) { +        return 1; +    } else { + +        for (i = 0; i < 64; i++) { +            carry = dhi >> 63; +            dhi = (dhi << 1) | (dlo >> 63); +            if (carry || (dhi >= divisor)) { +                dhi -= divisor; +                carry = 1; +            } else { +                carry = 0; +            } +            dlo = (dlo << 1) | carry; +        } + +        *plow = dlo; +        *phigh = dhi; +        return 0; +    } +} + +int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) +{ +    int sgn_dvdnd = *phigh < 0; +    int sgn_divsr = divisor < 0; +    int overflow = 0; + +    if (sgn_dvdnd) { +        *plow = ~(*plow); +        *phigh = ~(*phigh); +        if (*plow == (int64_t)-1) { +            *plow = 0; +            (*phigh)++; +         } else { +            (*plow)++; +         } +    } + +    if (sgn_divsr) { +        divisor = 0 - divisor; +    } + +    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor); + +    if (sgn_dvdnd  ^ sgn_divsr) { +        *plow = 0 - *plow; +    } + +    if (!overflow) { +        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) { +            overflow = 1; +        } +    } + +    return overflow; +} + diff --git a/util/id.c b/util/id.c new file mode 100644 index 00000000..09b22fb8 --- /dev/null +++ b/util/id.c @@ -0,0 +1,28 @@ +/* + * Dealing with identifiers + * + * Copyright (C) 2014 Red Hat, Inc. + * + * Authors: + *  Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later.  See the COPYING.LIB file in the top-level directory. + */ + +#include "qemu-common.h" + +bool id_wellformed(const char *id) +{ +    int i; + +    if (!qemu_isalpha(id[0])) { +        return false; +    } +    for (i = 1; id[i]; i++) { +        if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) { +            return false; +        } +    } +    return true; +} diff --git a/util/iov.c b/util/iov.c new file mode 100644 index 00000000..a0d5934e --- /dev/null +++ b/util/iov.c @@ -0,0 +1,577 @@ +/* + * Helpers for getting linearized buffers from iov / filling buffers into iovs + * + * Copyright IBM, Corp. 2007, 2008 + * Copyright (C) 2010 Red Hat, Inc. + * + * Author(s): + *  Anthony Liguori <aliguori@us.ibm.com> + *  Amit Shah <amit.shah@redhat.com> + *  Michael Tokarev <mjt@tls.msk.ru> + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/iov.h" +#include "qemu/sockets.h" + +size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, +                    size_t offset, const void *buf, size_t bytes) +{ +    size_t done; +    unsigned int i; +    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { +        if (offset < iov[i].iov_len) { +            size_t len = MIN(iov[i].iov_len - offset, bytes - done); +            memcpy(iov[i].iov_base + offset, buf + done, len); +            done += len; +            offset = 0; +        } else { +            offset -= iov[i].iov_len; +        } +    } +    assert(offset == 0); +    return done; +} + +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, +                  size_t offset, void *buf, size_t bytes) +{ +    size_t done; +    unsigned int i; +    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { +        if (offset < iov[i].iov_len) { +            size_t len = MIN(iov[i].iov_len - offset, bytes - done); +            memcpy(buf + done, iov[i].iov_base + offset, len); +            done += len; +            offset = 0; +        } else { +            offset -= iov[i].iov_len; +        } +    } +    assert(offset == 0); +    return done; +} + +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, +                  size_t offset, int fillc, size_t bytes) +{ +    size_t done; +    unsigned int i; +    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { +        if (offset < iov[i].iov_len) { +            size_t len = MIN(iov[i].iov_len - offset, bytes - done); +            memset(iov[i].iov_base + offset, fillc, len); +            done += len; +            offset = 0; +        } else { +            offset -= iov[i].iov_len; +        } +    } +    assert(offset == 0); +    return done; +} + +size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) +{ +    size_t len; +    unsigned int i; + +    len = 0; +    for (i = 0; i < iov_cnt; i++) { +        len += iov[i].iov_len; +    } +    return len; +} + +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#ifdef CONFIG_POSIX +    ssize_t ret; +    struct msghdr msg; +    memset(&msg, 0, sizeof(msg)); +    msg.msg_iov = iov; +    msg.msg_iovlen = iov_cnt; +    do { +        ret = do_send +            ? sendmsg(sockfd, &msg, 0) +            : recvmsg(sockfd, &msg, 0); +    } while (ret < 0 && errno == EINTR); +    return ret; +#else +    /* else send piece-by-piece */ +    /*XXX Note: windows has WSASend() and WSARecv() */ +    unsigned i = 0; +    ssize_t ret = 0; +    while (i < iov_cnt) { +        ssize_t r = do_send +            ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) +            : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); +        if (r > 0) { +            ret += r; +        } else if (!r) { +            break; +        } else if (errno == EINTR) { +            continue; +        } else { +            /* else it is some "other" error, +             * only return if there was no data processed. */ +            if (ret == 0) { +                ret = -1; +            } +            break; +        } +        i++; +    } +    return ret; +#endif +} + +ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt, +                      size_t offset, size_t bytes, +                      bool do_send) +{ +    ssize_t total = 0; +    ssize_t ret; +    size_t orig_len, tail; +    unsigned niov; +    struct iovec *local_iov, *iov; + +    if (bytes <= 0) { +        return 0; +    } + +    local_iov = g_new0(struct iovec, iov_cnt); +    iov_copy(local_iov, iov_cnt, _iov, iov_cnt, offset, bytes); +    offset = 0; +    iov = local_iov; + +    while (bytes > 0) { +        /* Find the start position, skipping `offset' bytes: +         * first, skip all full-sized vector elements, */ +        for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) { +            offset -= iov[niov].iov_len; +        } + +        /* niov == iov_cnt would only be valid if bytes == 0, which +         * we already ruled out in the loop condition.  */ +        assert(niov < iov_cnt); +        iov += niov; +        iov_cnt -= niov; + +        if (offset) { +            /* second, skip `offset' bytes from the (now) first element, +             * undo it on exit */ +            iov[0].iov_base += offset; +            iov[0].iov_len -= offset; +        } +        /* Find the end position skipping `bytes' bytes: */ +        /* first, skip all full-sized elements */ +        tail = bytes; +        for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) { +            tail -= iov[niov].iov_len; +        } +        if (tail) { +            /* second, fixup the last element, and remember the original +             * length */ +            assert(niov < iov_cnt); +            assert(iov[niov].iov_len > tail); +            orig_len = iov[niov].iov_len; +            iov[niov++].iov_len = tail; +            ret = do_send_recv(sockfd, iov, niov, do_send); +            /* Undo the changes above before checking for errors */ +            iov[niov-1].iov_len = orig_len; +        } else { +            ret = do_send_recv(sockfd, iov, niov, do_send); +        } +        if (offset) { +            iov[0].iov_base -= offset; +            iov[0].iov_len += offset; +        } + +        if (ret < 0) { +            assert(errno != EINTR); +            g_free(local_iov); +            if (errno == EAGAIN && total > 0) { +                return total; +            } +            return -1; +        } + +        if (ret == 0 && !do_send) { +            /* recv returns 0 when the peer has performed an orderly +             * shutdown. */ +            break; +        } + +        /* Prepare for the next iteration */ +        offset += ret; +        total += ret; +        bytes -= ret; +    } + +    g_free(local_iov); +    return total; +} + + +void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, +                 FILE *fp, const char *prefix, size_t limit) +{ +    int v; +    size_t size = 0; +    char *buf; + +    for (v = 0; v < iov_cnt; v++) { +        size += iov[v].iov_len; +    } +    size = size > limit ? limit : size; +    buf = g_malloc(size); +    iov_to_buf(iov, iov_cnt, 0, buf, size); +    qemu_hexdump(buf, fp, prefix, size); +    g_free(buf); +} + +unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt, +                 const struct iovec *iov, unsigned int iov_cnt, +                 size_t offset, size_t bytes) +{ +    size_t len; +    unsigned int i, j; +    for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) { +        if (offset >= iov[i].iov_len) { +            offset -= iov[i].iov_len; +            continue; +        } +        len = MIN(bytes, iov[i].iov_len - offset); + +        dst_iov[j].iov_base = iov[i].iov_base + offset; +        dst_iov[j].iov_len = len; +        j++; +        bytes -= len; +        offset = 0; +    } +    assert(offset == 0); +    return j; +} + +/* io vectors */ + +void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint) +{ +    qiov->iov = g_new(struct iovec, alloc_hint); +    qiov->niov = 0; +    qiov->nalloc = alloc_hint; +    qiov->size = 0; +} + +void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov) +{ +    int i; + +    qiov->iov = iov; +    qiov->niov = niov; +    qiov->nalloc = -1; +    qiov->size = 0; +    for (i = 0; i < niov; i++) +        qiov->size += iov[i].iov_len; +} + +void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) +{ +    assert(qiov->nalloc != -1); + +    if (qiov->niov == qiov->nalloc) { +        qiov->nalloc = 2 * qiov->nalloc + 1; +        qiov->iov = g_renew(struct iovec, qiov->iov, qiov->nalloc); +    } +    qiov->iov[qiov->niov].iov_base = base; +    qiov->iov[qiov->niov].iov_len = len; +    qiov->size += len; +    ++qiov->niov; +} + +/* + * Concatenates (partial) iovecs from src_iov to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src_iov if it comes first.  This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. + */ +size_t qemu_iovec_concat_iov(QEMUIOVector *dst, +                             struct iovec *src_iov, unsigned int src_cnt, +                             size_t soffset, size_t sbytes) +{ +    int i; +    size_t done; + +    if (!sbytes) { +        return 0; +    } +    assert(dst->nalloc != -1); +    for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) { +        if (soffset < src_iov[i].iov_len) { +            size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done); +            qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len); +            done += len; +            soffset = 0; +        } else { +            soffset -= src_iov[i].iov_len; +        } +    } +    assert(soffset == 0); /* offset beyond end of src */ + +    return done; +} + +/* + * Concatenates (partial) iovecs from src to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src if it comes first.  This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. + */ +void qemu_iovec_concat(QEMUIOVector *dst, +                       QEMUIOVector *src, size_t soffset, size_t sbytes) +{ +    qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); +} + +/* + * Check if the contents of the iovecs are all zero + */ +bool qemu_iovec_is_zero(QEMUIOVector *qiov) +{ +    int i; +    for (i = 0; i < qiov->niov; i++) { +        size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); +        uint8_t *ptr = qiov->iov[i].iov_base; +        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { +            return false; +        } +        for (; offs < qiov->iov[i].iov_len; offs++) { +            if (ptr[offs]) { +                return false; +            } +        } +    } +    return true; +} + +void qemu_iovec_destroy(QEMUIOVector *qiov) +{ +    assert(qiov->nalloc != -1); + +    qemu_iovec_reset(qiov); +    g_free(qiov->iov); +    qiov->nalloc = 0; +    qiov->iov = NULL; +} + +void qemu_iovec_reset(QEMUIOVector *qiov) +{ +    assert(qiov->nalloc != -1); + +    qiov->niov = 0; +    qiov->size = 0; +} + +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, +                         void *buf, size_t bytes) +{ +    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); +} + +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, +                           const void *buf, size_t bytes) +{ +    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); +} + +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, +                         int fillc, size_t bytes) +{ +    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); +} + +/** + * Check that I/O vector contents are identical + * + * The IO vectors must have the same structure (same length of all parts). + * A typical usage is to compare vectors created with qemu_iovec_clone(). + * + * @a:          I/O vector + * @b:          I/O vector + * @ret:        Offset to first mismatching byte or -1 if match + */ +ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) +{ +    int i; +    ssize_t offset = 0; + +    assert(a->niov == b->niov); +    for (i = 0; i < a->niov; i++) { +        size_t len = 0; +        uint8_t *p = (uint8_t *)a->iov[i].iov_base; +        uint8_t *q = (uint8_t *)b->iov[i].iov_base; + +        assert(a->iov[i].iov_len == b->iov[i].iov_len); +        while (len < a->iov[i].iov_len && *p++ == *q++) { +            len++; +        } + +        offset += len; + +        if (len != a->iov[i].iov_len) { +            return offset; +        } +    } +    return -1; +} + +typedef struct { +    int src_index; +    struct iovec *src_iov; +    void *dest_base; +} IOVectorSortElem; + +static int sortelem_cmp_src_base(const void *a, const void *b) +{ +    const IOVectorSortElem *elem_a = a; +    const IOVectorSortElem *elem_b = b; + +    /* Don't overflow */ +    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) { +        return -1; +    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) { +        return 1; +    } else { +        return 0; +    } +} + +static int sortelem_cmp_src_index(const void *a, const void *b) +{ +    const IOVectorSortElem *elem_a = a; +    const IOVectorSortElem *elem_b = b; + +    return elem_a->src_index - elem_b->src_index; +} + +/** + * Copy contents of I/O vector + * + * The relative relationships of overlapping iovecs are preserved.  This is + * necessary to ensure identical semantics in the cloned I/O vector. + */ +void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf) +{ +    IOVectorSortElem sortelems[src->niov]; +    void *last_end; +    int i; + +    /* Sort by source iovecs by base address */ +    for (i = 0; i < src->niov; i++) { +        sortelems[i].src_index = i; +        sortelems[i].src_iov = &src->iov[i]; +    } +    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base); + +    /* Allocate buffer space taking into account overlapping iovecs */ +    last_end = NULL; +    for (i = 0; i < src->niov; i++) { +        struct iovec *cur = sortelems[i].src_iov; +        ptrdiff_t rewind = 0; + +        /* Detect overlap */ +        if (last_end && last_end > cur->iov_base) { +            rewind = last_end - cur->iov_base; +        } + +        sortelems[i].dest_base = buf - rewind; +        buf += cur->iov_len - MIN(rewind, cur->iov_len); +        last_end = MAX(cur->iov_base + cur->iov_len, last_end); +    } + +    /* Sort by source iovec index and build destination iovec */ +    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index); +    for (i = 0; i < src->niov; i++) { +        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len); +    } +} + +size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt, +                         size_t bytes) +{ +    size_t total = 0; +    struct iovec *cur; + +    for (cur = *iov; *iov_cnt > 0; cur++) { +        if (cur->iov_len > bytes) { +            cur->iov_base += bytes; +            cur->iov_len -= bytes; +            total += bytes; +            break; +        } + +        bytes -= cur->iov_len; +        total += cur->iov_len; +        *iov_cnt -= 1; +    } + +    *iov = cur; +    return total; +} + +size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, +                        size_t bytes) +{ +    size_t total = 0; +    struct iovec *cur; + +    if (*iov_cnt == 0) { +        return 0; +    } + +    cur = iov + (*iov_cnt - 1); + +    while (*iov_cnt > 0) { +        if (cur->iov_len > bytes) { +            cur->iov_len -= bytes; +            total += bytes; +            break; +        } + +        bytes -= cur->iov_len; +        total += cur->iov_len; +        cur--; +        *iov_cnt -= 1; +    } + +    return total; +} + +void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes) +{ +    size_t total; +    unsigned int niov = qiov->niov; + +    assert(qiov->size >= bytes); +    total = iov_discard_back(qiov->iov, &niov, bytes); +    assert(total == bytes); + +    qiov->niov = niov; +    qiov->size -= bytes; +} diff --git a/util/module.c b/util/module.c new file mode 100644 index 00000000..4bd4a94d --- /dev/null +++ b/util/module.c @@ -0,0 +1,219 @@ +/* + * QEMU Module Infrastructure + * + * Copyright IBM, Corp. 2009 + * + * Authors: + *  Anthony Liguori   <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include <stdlib.h> +#include "qemu-common.h" +#ifdef CONFIG_MODULES +#include <gmodule.h> +#endif +#include "qemu/queue.h" +#include "qemu/module.h" + +typedef struct ModuleEntry +{ +    void (*init)(void); +    QTAILQ_ENTRY(ModuleEntry) node; +    module_init_type type; +} ModuleEntry; + +typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList; + +static ModuleTypeList init_type_list[MODULE_INIT_MAX]; + +static ModuleTypeList dso_init_list; + +static void init_lists(void) +{ +    static int inited; +    int i; + +    if (inited) { +        return; +    } + +    for (i = 0; i < MODULE_INIT_MAX; i++) { +        QTAILQ_INIT(&init_type_list[i]); +    } + +    QTAILQ_INIT(&dso_init_list); + +    inited = 1; +} + + +static ModuleTypeList *find_type(module_init_type type) +{ +    ModuleTypeList *l; + +    init_lists(); + +    l = &init_type_list[type]; + +    return l; +} + +void register_module_init(void (*fn)(void), module_init_type type) +{ +    ModuleEntry *e; +    ModuleTypeList *l; + +    e = g_malloc0(sizeof(*e)); +    e->init = fn; +    e->type = type; + +    l = find_type(type); + +    QTAILQ_INSERT_TAIL(l, e, node); +} + +void register_dso_module_init(void (*fn)(void), module_init_type type) +{ +    ModuleEntry *e; + +    init_lists(); + +    e = g_malloc0(sizeof(*e)); +    e->init = fn; +    e->type = type; + +    QTAILQ_INSERT_TAIL(&dso_init_list, e, node); +} + +static void module_load(module_init_type type); + +void module_call_init(module_init_type type) +{ +    ModuleTypeList *l; +    ModuleEntry *e; + +    module_load(type); +    l = find_type(type); + +    QTAILQ_FOREACH(e, l, node) { +        e->init(); +    } +} + +#ifdef CONFIG_MODULES +static int module_load_file(const char *fname) +{ +    GModule *g_module; +    void (*sym)(void); +    const char *dsosuf = HOST_DSOSUF; +    int len = strlen(fname); +    int suf_len = strlen(dsosuf); +    ModuleEntry *e, *next; +    int ret; + +    if (len <= suf_len || strcmp(&fname[len - suf_len], dsosuf)) { +        /* wrong suffix */ +        ret = -EINVAL; +        goto out; +    } +    if (access(fname, F_OK)) { +        ret = -ENOENT; +        goto out; +    } + +    assert(QTAILQ_EMPTY(&dso_init_list)); + +    g_module = g_module_open(fname, G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL); +    if (!g_module) { +        fprintf(stderr, "Failed to open module: %s\n", +                g_module_error()); +        ret = -EINVAL; +        goto out; +    } +    if (!g_module_symbol(g_module, DSO_STAMP_FUN_STR, (gpointer *)&sym)) { +        fprintf(stderr, "Failed to initialize module: %s\n", +                fname); +        /* Print some info if this is a QEMU module (but from different build), +         * this will make debugging user problems easier. */ +        if (g_module_symbol(g_module, "qemu_module_dummy", (gpointer *)&sym)) { +            fprintf(stderr, +                    "Note: only modules from the same build can be loaded.\n"); +        } +        g_module_close(g_module); +        ret = -EINVAL; +    } else { +        QTAILQ_FOREACH(e, &dso_init_list, node) { +            register_module_init(e->init, e->type); +        } +        ret = 0; +    } + +    QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) { +        QTAILQ_REMOVE(&dso_init_list, e, node); +        g_free(e); +    } +out: +    return ret; +} +#endif + +static void module_load(module_init_type type) +{ +#ifdef CONFIG_MODULES +    char *fname = NULL; +    const char **mp; +    static const char *block_modules[] = { +        CONFIG_BLOCK_MODULES +    }; +    char *exec_dir; +    char *dirs[3]; +    int i = 0; +    int ret; + +    if (!g_module_supported()) { +        fprintf(stderr, "Module is not supported by system.\n"); +        return; +    } + +    switch (type) { +    case MODULE_INIT_BLOCK: +        mp = block_modules; +        break; +    default: +        /* no other types have dynamic modules for now*/ +        return; +    } + +    exec_dir = qemu_get_exec_dir(); +    dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR); +    dirs[i++] = g_strdup_printf("%s/..", exec_dir ? : ""); +    dirs[i++] = g_strdup_printf("%s", exec_dir ? : ""); +    assert(i == ARRAY_SIZE(dirs)); +    g_free(exec_dir); +    exec_dir = NULL; + +    for ( ; *mp; mp++) { +        for (i = 0; i < ARRAY_SIZE(dirs); i++) { +            fname = g_strdup_printf("%s/%s%s", dirs[i], *mp, HOST_DSOSUF); +            ret = module_load_file(fname); +            g_free(fname); +            fname = NULL; +            /* Try loading until loaded a module file */ +            if (!ret) { +                break; +            } +        } +    } + +    for (i = 0; i < ARRAY_SIZE(dirs); i++) { +        g_free(dirs[i]); +    } + +#endif +} diff --git a/util/notify.c b/util/notify.c new file mode 100644 index 00000000..f215dfc2 --- /dev/null +++ b/util/notify.c @@ -0,0 +1,71 @@ +/* + * Notifier lists + * + * Copyright IBM, Corp. 2010 + * + * Authors: + *  Anthony Liguori   <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "qemu/notify.h" + +void notifier_list_init(NotifierList *list) +{ +    QLIST_INIT(&list->notifiers); +} + +void notifier_list_add(NotifierList *list, Notifier *notifier) +{ +    QLIST_INSERT_HEAD(&list->notifiers, notifier, node); +} + +void notifier_remove(Notifier *notifier) +{ +    QLIST_REMOVE(notifier, node); +} + +void notifier_list_notify(NotifierList *list, void *data) +{ +    Notifier *notifier, *next; + +    QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) { +        notifier->notify(notifier, data); +    } +} + +void notifier_with_return_list_init(NotifierWithReturnList *list) +{ +    QLIST_INIT(&list->notifiers); +} + +void notifier_with_return_list_add(NotifierWithReturnList *list, +                                   NotifierWithReturn *notifier) +{ +    QLIST_INSERT_HEAD(&list->notifiers, notifier, node); +} + +void notifier_with_return_remove(NotifierWithReturn *notifier) +{ +    QLIST_REMOVE(notifier, node); +} + +int notifier_with_return_list_notify(NotifierWithReturnList *list, void *data) +{ +    NotifierWithReturn *notifier, *next; +    int ret = 0; + +    QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) { +        ret = notifier->notify(notifier, data); +        if (ret != 0) { +            break; +        } +    } +    return ret; +} diff --git a/util/osdep.c b/util/osdep.c new file mode 100644 index 00000000..0092bb61 --- /dev/null +++ b/util/osdep.c @@ -0,0 +1,430 @@ +/* + * QEMU low level functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> + +/* Needed early for CONFIG_BSD etc. */ +#include "config-host.h" + +#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) +#include <sys/mman.h> +#endif + +#ifdef CONFIG_SOLARIS +#include <sys/types.h> +#include <sys/statvfs.h> +/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for +   discussion about Solaris header problems */ +extern int madvise(caddr_t, size_t, int); +#endif + +#include "qemu-common.h" +#include "qemu/sockets.h" +#include "qemu/error-report.h" +#include "monitor/monitor.h" + +static bool fips_enabled = false; + +static const char *qemu_version = QEMU_VERSION; + +int socket_set_cork(int fd, int v) +{ +#if defined(SOL_TCP) && defined(TCP_CORK) +    return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v)); +#else +    return 0; +#endif +} + +int socket_set_nodelay(int fd) +{ +    int v = 1; +    return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); +} + +int qemu_madvise(void *addr, size_t len, int advice) +{ +    if (advice == QEMU_MADV_INVALID) { +        errno = EINVAL; +        return -1; +    } +#if defined(CONFIG_MADVISE) +    return madvise(addr, len, advice); +#elif defined(CONFIG_POSIX_MADVISE) +    return posix_madvise(addr, len, advice); +#else +    errno = EINVAL; +    return -1; +#endif +} + +#ifndef _WIN32 +/* + * Dups an fd and sets the flags + */ +static int qemu_dup_flags(int fd, int flags) +{ +    int ret; +    int serrno; +    int dup_flags; + +#ifdef F_DUPFD_CLOEXEC +    ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else +    ret = dup(fd); +    if (ret != -1) { +        qemu_set_cloexec(ret); +    } +#endif +    if (ret == -1) { +        goto fail; +    } + +    dup_flags = fcntl(ret, F_GETFL); +    if (dup_flags == -1) { +        goto fail; +    } + +    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) { +        errno = EINVAL; +        goto fail; +    } + +    /* Set/unset flags that we can with fcntl */ +    if (fcntl(ret, F_SETFL, flags) == -1) { +        goto fail; +    } + +    /* Truncate the file in the cases that open() would truncate it */ +    if (flags & O_TRUNC || +            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) { +        if (ftruncate(ret, 0) == -1) { +            goto fail; +        } +    } + +    return ret; + +fail: +    serrno = errno; +    if (ret != -1) { +        close(ret); +    } +    errno = serrno; +    return -1; +} + +static int qemu_parse_fdset(const char *param) +{ +    return qemu_parse_fd(param); +} +#endif + +/* + * Opens a file with FD_CLOEXEC set + */ +int qemu_open(const char *name, int flags, ...) +{ +    int ret; +    int mode = 0; + +#ifndef _WIN32 +    const char *fdset_id_str; + +    /* Attempt dup of fd from fd set */ +    if (strstart(name, "/dev/fdset/", &fdset_id_str)) { +        int64_t fdset_id; +        int fd, dupfd; + +        fdset_id = qemu_parse_fdset(fdset_id_str); +        if (fdset_id == -1) { +            errno = EINVAL; +            return -1; +        } + +        fd = monitor_fdset_get_fd(fdset_id, flags); +        if (fd == -1) { +            return -1; +        } + +        dupfd = qemu_dup_flags(fd, flags); +        if (dupfd == -1) { +            return -1; +        } + +        ret = monitor_fdset_dup_fd_add(fdset_id, dupfd); +        if (ret == -1) { +            close(dupfd); +            errno = EINVAL; +            return -1; +        } + +        return dupfd; +    } +#endif + +    if (flags & O_CREAT) { +        va_list ap; + +        va_start(ap, flags); +        mode = va_arg(ap, int); +        va_end(ap); +    } + +#ifdef O_CLOEXEC +    ret = open(name, flags | O_CLOEXEC, mode); +#else +    ret = open(name, flags, mode); +    if (ret >= 0) { +        qemu_set_cloexec(ret); +    } +#endif + +#ifdef O_DIRECT +    if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) { +        error_report("file system may not support O_DIRECT"); +        errno = EINVAL; /* in case it was clobbered */ +    } +#endif /* O_DIRECT */ + +    return ret; +} + +int qemu_close(int fd) +{ +    int64_t fdset_id; + +    /* Close fd that was dup'd from an fdset */ +    fdset_id = monitor_fdset_dup_fd_find(fd); +    if (fdset_id != -1) { +        int ret; + +        ret = close(fd); +        if (ret == 0) { +            monitor_fdset_dup_fd_remove(fd); +        } + +        return ret; +    } + +    return close(fd); +} + +/* + * A variant of write(2) which handles partial write. + * + * Return the number of bytes transferred. + * Set errno if fewer than `count' bytes are written. + * + * This function don't work with non-blocking fd's. + * Any of the possibilities with non-bloking fd's is bad: + *   - return a short write (then name is wrong) + *   - busy wait adding (errno == EAGAIN) to the loop + */ +ssize_t qemu_write_full(int fd, const void *buf, size_t count) +{ +    ssize_t ret = 0; +    ssize_t total = 0; + +    while (count) { +        ret = write(fd, buf, count); +        if (ret < 0) { +            if (errno == EINTR) +                continue; +            break; +        } + +        count -= ret; +        buf += ret; +        total += ret; +    } + +    return total; +} + +/* + * Opens a socket with FD_CLOEXEC set + */ +int qemu_socket(int domain, int type, int protocol) +{ +    int ret; + +#ifdef SOCK_CLOEXEC +    ret = socket(domain, type | SOCK_CLOEXEC, protocol); +    if (ret != -1 || errno != EINVAL) { +        return ret; +    } +#endif +    ret = socket(domain, type, protocol); +    if (ret >= 0) { +        qemu_set_cloexec(ret); +    } + +    return ret; +} + +/* + * Accept a connection and set FD_CLOEXEC + */ +int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ +    int ret; + +#ifdef CONFIG_ACCEPT4 +    ret = accept4(s, addr, addrlen, SOCK_CLOEXEC); +    if (ret != -1 || errno != ENOSYS) { +        return ret; +    } +#endif +    ret = accept(s, addr, addrlen); +    if (ret >= 0) { +        qemu_set_cloexec(ret); +    } + +    return ret; +} + +void qemu_set_version(const char *version) +{ +    qemu_version = version; +} + +const char *qemu_get_version(void) +{ +    return qemu_version; +} + +void fips_set_state(bool requested) +{ +#ifdef __linux__ +    if (requested) { +        FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r"); +        if (fds != NULL) { +            fips_enabled = (fgetc(fds) == '1'); +            fclose(fds); +        } +    } +#else +    fips_enabled = false; +#endif /* __linux__ */ + +#ifdef _FIPS_DEBUG +    fprintf(stderr, "FIPS mode %s (requested %s)\n", +	    (fips_enabled ? "enabled" : "disabled"), +	    (requested ? "enabled" : "disabled")); +#endif +} + +bool fips_get_state(void) +{ +    return fips_enabled; +} + +#ifdef _WIN32 +static void socket_cleanup(void) +{ +    WSACleanup(); +} +#endif + +int socket_init(void) +{ +#ifdef _WIN32 +    WSADATA Data; +    int ret, err; + +    ret = WSAStartup(MAKEWORD(2, 2), &Data); +    if (ret != 0) { +        err = WSAGetLastError(); +        fprintf(stderr, "WSAStartup: %d\n", err); +        return -1; +    } +    atexit(socket_cleanup); +#endif +    return 0; +} + +#if !GLIB_CHECK_VERSION(2, 31, 0) +/* Ensure that glib is running in multi-threaded mode + * Old versions of glib require explicit initialization.  Failure to do + * this results in the single-threaded code paths being taken inside + * glib.  For example, the g_slice allocator will not be thread-safe + * and cause crashes. + */ +static void __attribute__((constructor)) thread_init(void) +{ +    if (!g_thread_supported()) { +       g_thread_init(NULL); +    } +} +#endif + +#ifndef CONFIG_IOVEC +/* helper function for iov_send_recv() */ +static ssize_t +readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) +{ +    unsigned i = 0; +    ssize_t ret = 0; +    while (i < iov_cnt) { +        ssize_t r = do_write +            ? write(fd, iov[i].iov_base, iov[i].iov_len) +            : read(fd, iov[i].iov_base, iov[i].iov_len); +        if (r > 0) { +            ret += r; +        } else if (!r) { +            break; +        } else if (errno == EINTR) { +            continue; +        } else { +            /* else it is some "other" error, +             * only return if there was no data processed. */ +            if (ret == 0) { +                ret = -1; +            } +            break; +        } +        i++; +    } +    return ret; +} + +ssize_t +readv(int fd, const struct iovec *iov, int iov_cnt) +{ +    return readv_writev(fd, iov, iov_cnt, false); +} + +ssize_t +writev(int fd, const struct iovec *iov, int iov_cnt) +{ +    return readv_writev(fd, iov, iov_cnt, true); +} +#endif diff --git a/util/oslib-posix.c b/util/oslib-posix.c new file mode 100644 index 00000000..3ae4987b --- /dev/null +++ b/util/oslib-posix.c @@ -0,0 +1,484 @@ +/* + * os-posix-lib.c + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * QEMU library functions on POSIX which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* The following block of code temporarily renames the daemon() function so the +   compiler does not see the warning associated with it in stdlib.h on OSX */ +#ifdef __APPLE__ +#define daemon qemu_fake_daemon_function +#include <stdlib.h> +#undef daemon +extern int daemon(int, int); +#endif + +#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__)) +   /* Use 2 MiB alignment so transparent hugepages can be used by KVM. +      Valgrind does not support alignments larger than 1 MiB, +      therefore we need special code which handles running on Valgrind. */ +#  define QEMU_VMALLOC_ALIGN (512 * 4096) +#elif defined(__linux__) && defined(__s390x__) +   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ +#  define QEMU_VMALLOC_ALIGN (256 * 4096) +#else +#  define QEMU_VMALLOC_ALIGN getpagesize() +#endif +#define HUGETLBFS_MAGIC       0x958458f6 + +#include <termios.h> +#include <unistd.h> +#include <termios.h> + +#include <glib/gprintf.h> + +#include "config-host.h" +#include "sysemu/sysemu.h" +#include "trace.h" +#include "qemu/sockets.h" +#include <sys/mman.h> +#include <libgen.h> +#include <setjmp.h> +#include <sys/signal.h> + +#ifdef CONFIG_LINUX +#include <sys/syscall.h> +#include <sys/vfs.h> +#endif + +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#endif + +int qemu_get_thread_id(void) +{ +#if defined(__linux__) +    return syscall(SYS_gettid); +#else +    return getpid(); +#endif +} + +int qemu_daemon(int nochdir, int noclose) +{ +    return daemon(nochdir, noclose); +} + +void *qemu_oom_check(void *ptr) +{ +    if (ptr == NULL) { +        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); +        abort(); +    } +    return ptr; +} + +void *qemu_try_memalign(size_t alignment, size_t size) +{ +    void *ptr; + +    if (alignment < sizeof(void*)) { +        alignment = sizeof(void*); +    } + +#if defined(_POSIX_C_SOURCE) && !defined(__sun__) +    int ret; +    ret = posix_memalign(&ptr, alignment, size); +    if (ret != 0) { +        errno = ret; +        ptr = NULL; +    } +#elif defined(CONFIG_BSD) +    ptr = valloc(size); +#else +    ptr = memalign(alignment, size); +#endif +    trace_qemu_memalign(alignment, size, ptr); +    return ptr; +} + +void *qemu_memalign(size_t alignment, size_t size) +{ +    return qemu_oom_check(qemu_try_memalign(alignment, size)); +} + +/* alloc shared memory pages */ +void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) +{ +    size_t align = QEMU_VMALLOC_ALIGN; +    size_t total = size + align - getpagesize(); +    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE, +                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + +    if (ptr == MAP_FAILED) { +        return NULL; +    } + +    if (alignment) { +        *alignment = align; +    } +    ptr += offset; +    total -= offset; + +    if (offset > 0) { +        munmap(ptr - offset, offset); +    } +    if (total > size) { +        munmap(ptr + size, total - size); +    } + +    trace_qemu_anon_ram_alloc(size, ptr); +    return ptr; +} + +void qemu_vfree(void *ptr) +{ +    trace_qemu_vfree(ptr); +    free(ptr); +} + +void qemu_anon_ram_free(void *ptr, size_t size) +{ +    trace_qemu_anon_ram_free(ptr, size); +    if (ptr) { +        munmap(ptr, size); +    } +} + +void qemu_set_block(int fd) +{ +    int f; +    f = fcntl(fd, F_GETFL); +    fcntl(fd, F_SETFL, f & ~O_NONBLOCK); +} + +void qemu_set_nonblock(int fd) +{ +    int f; +    f = fcntl(fd, F_GETFL); +    fcntl(fd, F_SETFL, f | O_NONBLOCK); +} + +int socket_set_fast_reuse(int fd) +{ +    int val = 1, ret; + +    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, +                     (const char *)&val, sizeof(val)); + +    assert(ret == 0); + +    return ret; +} + +void qemu_set_cloexec(int fd) +{ +    int f; +    f = fcntl(fd, F_GETFD); +    fcntl(fd, F_SETFD, f | FD_CLOEXEC); +} + +/* + * Creates a pipe with FD_CLOEXEC set on both file descriptors + */ +int qemu_pipe(int pipefd[2]) +{ +    int ret; + +#ifdef CONFIG_PIPE2 +    ret = pipe2(pipefd, O_CLOEXEC); +    if (ret != -1 || errno != ENOSYS) { +        return ret; +    } +#endif +    ret = pipe(pipefd); +    if (ret == 0) { +        qemu_set_cloexec(pipefd[0]); +        qemu_set_cloexec(pipefd[1]); +    } + +    return ret; +} + +int qemu_utimens(const char *path, const struct timespec *times) +{ +    struct timeval tv[2], tv_now; +    struct stat st; +    int i; +#ifdef CONFIG_UTIMENSAT +    int ret; + +    ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); +    if (ret != -1 || errno != ENOSYS) { +        return ret; +    } +#endif +    /* Fallback: use utimes() instead of utimensat() */ + +    /* happy if special cases */ +    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { +        return 0; +    } +    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { +        return utimes(path, NULL); +    } + +    /* prepare for hard cases */ +    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { +        gettimeofday(&tv_now, NULL); +    } +    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { +        stat(path, &st); +    } + +    for (i = 0; i < 2; i++) { +        if (times[i].tv_nsec == UTIME_NOW) { +            tv[i].tv_sec = tv_now.tv_sec; +            tv[i].tv_usec = tv_now.tv_usec; +        } else if (times[i].tv_nsec == UTIME_OMIT) { +            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; +            tv[i].tv_usec = 0; +        } else { +            tv[i].tv_sec = times[i].tv_sec; +            tv[i].tv_usec = times[i].tv_nsec / 1000; +        } +    } + +    return utimes(path, &tv[0]); +} + +char * +qemu_get_local_state_pathname(const char *relative_pathname) +{ +    return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, +                           relative_pathname); +} + +void qemu_set_tty_echo(int fd, bool echo) +{ +    struct termios tty; + +    tcgetattr(fd, &tty); + +    if (echo) { +        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; +    } else { +        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); +    } + +    tcsetattr(fd, TCSANOW, &tty); +} + +static char exec_dir[PATH_MAX]; + +void qemu_init_exec_dir(const char *argv0) +{ +    char *dir; +    char *p = NULL; +    char buf[PATH_MAX]; + +    assert(!exec_dir[0]); + +#if defined(__linux__) +    { +        int len; +        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); +        if (len > 0) { +            buf[len] = 0; +            p = buf; +        } +    } +#elif defined(__FreeBSD__) +    { +        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; +        size_t len = sizeof(buf) - 1; + +        *buf = '\0'; +        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && +            *buf) { +            buf[sizeof(buf) - 1] = '\0'; +            p = buf; +        } +    } +#endif +    /* If we don't have any way of figuring out the actual executable +       location then try argv[0].  */ +    if (!p) { +        if (!argv0) { +            return; +        } +        p = realpath(argv0, buf); +        if (!p) { +            return; +        } +    } +    dir = dirname(p); + +    pstrcpy(exec_dir, sizeof(exec_dir), dir); +} + +char *qemu_get_exec_dir(void) +{ +    return g_strdup(exec_dir); +} + +static sigjmp_buf sigjump; + +static void sigbus_handler(int signal) +{ +    siglongjmp(sigjump, 1); +} + +static size_t fd_getpagesize(int fd) +{ +#ifdef CONFIG_LINUX +    struct statfs fs; +    int ret; + +    if (fd != -1) { +        do { +            ret = fstatfs(fd, &fs); +        } while (ret != 0 && errno == EINTR); + +        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { +            return fs.f_bsize; +        } +    } +#endif + +    return getpagesize(); +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ +    int ret; +    struct sigaction act, oldact; +    sigset_t set, oldset; + +    memset(&act, 0, sizeof(act)); +    act.sa_handler = &sigbus_handler; +    act.sa_flags = 0; + +    ret = sigaction(SIGBUS, &act, &oldact); +    if (ret) { +        perror("os_mem_prealloc: failed to install signal handler"); +        exit(1); +    } + +    /* unblock SIGBUS */ +    sigemptyset(&set); +    sigaddset(&set, SIGBUS); +    pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + +    if (sigsetjmp(sigjump, 1)) { +        fprintf(stderr, "os_mem_prealloc: Insufficient free host memory " +                        "pages available to allocate guest RAM\n"); +        exit(1); +    } else { +        int i; +        size_t hpagesize = fd_getpagesize(fd); +        size_t numpages = DIV_ROUND_UP(memory, hpagesize); + +        /* MAP_POPULATE silently ignores failures */ +        for (i = 0; i < numpages; i++) { +            memset(area + (hpagesize * i), 0, 1); +        } + +        ret = sigaction(SIGBUS, &oldact, NULL); +        if (ret) { +            perror("os_mem_prealloc: failed to reinstall signal handler"); +            exit(1); +        } + +        pthread_sigmask(SIG_SETMASK, &oldset, NULL); +    } +} + + +static struct termios oldtty; + +static void term_exit(void) +{ +    tcsetattr(0, TCSANOW, &oldtty); +} + +static void term_init(void) +{ +    struct termios tty; + +    tcgetattr(0, &tty); +    oldtty = tty; + +    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP +                          |INLCR|IGNCR|ICRNL|IXON); +    tty.c_oflag |= OPOST; +    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); +    tty.c_cflag &= ~(CSIZE|PARENB); +    tty.c_cflag |= CS8; +    tty.c_cc[VMIN] = 1; +    tty.c_cc[VTIME] = 0; + +    tcsetattr(0, TCSANOW, &tty); + +    atexit(term_exit); +} + +int qemu_read_password(char *buf, int buf_size) +{ +    uint8_t ch; +    int i, ret; + +    printf("password: "); +    fflush(stdout); +    term_init(); +    i = 0; +    for (;;) { +        ret = read(0, &ch, 1); +        if (ret == -1) { +            if (errno == EAGAIN || errno == EINTR) { +                continue; +            } else { +                break; +            } +        } else if (ret == 0) { +            ret = -1; +            break; +        } else { +            if (ch == '\r' || +                ch == '\n') { +                ret = 0; +                break; +            } +            if (i < (buf_size - 1)) { +                buf[i++] = ch; +            } +        } +    } +    term_exit(); +    buf[i] = '\0'; +    printf("\n"); +    return ret; +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c new file mode 100644 index 00000000..730a6707 --- /dev/null +++ b/util/oslib-win32.c @@ -0,0 +1,496 @@ +/* + * os-win32.c + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * QEMU library functions for win32 which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * The implementation of g_poll (functions poll_rest, g_poll) at the end of + * this file are based on code from GNOME glib-2 and use a different license, + * see the license comment there. + */ +#include <windows.h> +#include <glib.h> +#include <stdlib.h> +#include "config-host.h" +#include "sysemu/sysemu.h" +#include "qemu/main-loop.h" +#include "trace.h" +#include "qemu/sockets.h" + +/* this must come after including "trace.h" */ +#include <shlobj.h> + +void *qemu_oom_check(void *ptr) +{ +    if (ptr == NULL) { +        fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError()); +        abort(); +    } +    return ptr; +} + +void *qemu_try_memalign(size_t alignment, size_t size) +{ +    void *ptr; + +    if (!size) { +        abort(); +    } +    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +    trace_qemu_memalign(alignment, size, ptr); +    return ptr; +} + +void *qemu_memalign(size_t alignment, size_t size) +{ +    return qemu_oom_check(qemu_try_memalign(alignment, size)); +} + +void *qemu_anon_ram_alloc(size_t size, uint64_t *align) +{ +    void *ptr; + +    /* FIXME: this is not exactly optimal solution since VirtualAlloc +       has 64Kb granularity, but at least it guarantees us that the +       memory is page aligned. */ +    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); +    trace_qemu_anon_ram_alloc(size, ptr); +    return ptr; +} + +void qemu_vfree(void *ptr) +{ +    trace_qemu_vfree(ptr); +    if (ptr) { +        VirtualFree(ptr, 0, MEM_RELEASE); +    } +} + +void qemu_anon_ram_free(void *ptr, size_t size) +{ +    trace_qemu_anon_ram_free(ptr, size); +    if (ptr) { +        VirtualFree(ptr, 0, MEM_RELEASE); +    } +} + +/* FIXME: add proper locking */ +struct tm *gmtime_r(const time_t *timep, struct tm *result) +{ +    struct tm *p = gmtime(timep); +    memset(result, 0, sizeof(*result)); +    if (p) { +        *result = *p; +        p = result; +    } +    return p; +} + +/* FIXME: add proper locking */ +struct tm *localtime_r(const time_t *timep, struct tm *result) +{ +    struct tm *p = localtime(timep); +    memset(result, 0, sizeof(*result)); +    if (p) { +        *result = *p; +        p = result; +    } +    return p; +} + +void qemu_set_block(int fd) +{ +    unsigned long opt = 0; +    WSAEventSelect(fd, NULL, 0); +    ioctlsocket(fd, FIONBIO, &opt); +} + +void qemu_set_nonblock(int fd) +{ +    unsigned long opt = 1; +    ioctlsocket(fd, FIONBIO, &opt); +    qemu_fd_register(fd); +} + +int socket_set_fast_reuse(int fd) +{ +    /* Enabling the reuse of an endpoint that was used by a socket still in +     * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows +     * fast reuse is the default and SO_REUSEADDR does strange things. So we +     * don't have to do anything here. More info can be found at: +     * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ +    return 0; +} + +int inet_aton(const char *cp, struct in_addr *ia) +{ +    uint32_t addr = inet_addr(cp); +    if (addr == 0xffffffff) { +        return 0; +    } +    ia->s_addr = addr; +    return 1; +} + +void qemu_set_cloexec(int fd) +{ +} + +/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */ +#define _W32_FT_OFFSET (116444736000000000ULL) + +int qemu_gettimeofday(qemu_timeval *tp) +{ +  union { +    unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */ +    FILETIME ft; +  }  _now; + +  if(tp) { +      GetSystemTimeAsFileTime (&_now.ft); +      tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL ); +      tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL); +  } +  /* Always return 0 as per Open Group Base Specifications Issue 6. +     Do not set errno on error.  */ +  return 0; +} + +int qemu_get_thread_id(void) +{ +    return GetCurrentThreadId(); +} + +char * +qemu_get_local_state_pathname(const char *relative_pathname) +{ +    HRESULT result; +    char base_path[MAX_PATH+1] = ""; + +    result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL, +                             /* SHGFP_TYPE_CURRENT */ 0, base_path); +    if (result != S_OK) { +        /* misconfigured environment */ +        g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result); +        abort(); +    } +    return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path, +                           relative_pathname); +} + +void qemu_set_tty_echo(int fd, bool echo) +{ +    HANDLE handle = (HANDLE)_get_osfhandle(fd); +    DWORD dwMode = 0; + +    if (handle == INVALID_HANDLE_VALUE) { +        return; +    } + +    GetConsoleMode(handle, &dwMode); + +    if (echo) { +        SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT); +    } else { +        SetConsoleMode(handle, +                       dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT)); +    } +} + +static char exec_dir[PATH_MAX]; + +void qemu_init_exec_dir(const char *argv0) +{ + +    char *p; +    char buf[MAX_PATH]; +    DWORD len; + +    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1); +    if (len == 0) { +        return; +    } + +    buf[len] = 0; +    p = buf + len - 1; +    while (p != buf && *p != '\\') { +        p--; +    } +    *p = 0; +    if (access(buf, R_OK) == 0) { +        pstrcpy(exec_dir, sizeof(exec_dir), buf); +    } +} + +char *qemu_get_exec_dir(void) +{ +    return g_strdup(exec_dir); +} + +/* + * The original implementation of g_poll from glib has a problem on Windows + * when using timeouts < 10 ms. + * + * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead + * of wait. This causes significant performance degradation of QEMU. + * + * The following code is a copy of the original code from glib/gpoll.c + * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19). + * Some debug code was removed and the code was reformatted. + * All other code modifications are marked with 'QEMU'. + */ + +/* + * gpoll.c: poll(2) abstraction + * Copyright 1998 Owen Taylor + * Copyright 2008 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles, +                     GPollFD *fds, guint nfds, gint timeout) +{ +    DWORD ready; +    GPollFD *f; +    int recursed_result; + +    if (poll_msgs) { +        /* Wait for either messages or handles +         * -> Use MsgWaitForMultipleObjectsEx +         */ +        ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout, +                                            QS_ALLINPUT, MWMO_ALERTABLE); + +        if (ready == WAIT_FAILED) { +            gchar *emsg = g_win32_error_message(GetLastError()); +            g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg); +            g_free(emsg); +        } +    } else if (nhandles == 0) { +        /* No handles to wait for, just the timeout */ +        if (timeout == INFINITE) { +            ready = WAIT_FAILED; +        } else { +            SleepEx(timeout, TRUE); +            ready = WAIT_TIMEOUT; +        } +    } else { +        /* Wait for just handles +         * -> Use WaitForMultipleObjectsEx +         */ +        ready = +            WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE); +        if (ready == WAIT_FAILED) { +            gchar *emsg = g_win32_error_message(GetLastError()); +            g_warning("WaitForMultipleObjectsEx failed: %s", emsg); +            g_free(emsg); +        } +    } + +    if (ready == WAIT_FAILED) { +        return -1; +    } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) { +        return 0; +    } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) { +        for (f = fds; f < &fds[nfds]; ++f) { +            if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) { +                f->revents |= G_IO_IN; +            } +        } + +        /* If we have a timeout, or no handles to poll, be satisfied +         * with just noticing we have messages waiting. +         */ +        if (timeout != 0 || nhandles == 0) { +            return 1; +        } + +        /* If no timeout and handles to poll, recurse to poll them, +         * too. +         */ +        recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0); +        return (recursed_result == -1) ? -1 : 1 + recursed_result; +    } else if (/* QEMU: removed the following unneeded statement which causes +                * a compiler warning: ready >= WAIT_OBJECT_0 && */ +               ready < WAIT_OBJECT_0 + nhandles) { +        for (f = fds; f < &fds[nfds]; ++f) { +            if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) { +                f->revents = f->events; +            } +        } + +        /* If no timeout and polling several handles, recurse to poll +         * the rest of them. +         */ +        if (timeout == 0 && nhandles > 1) { +            /* Remove the handle that fired */ +            int i; +            if (ready < nhandles - 1) { +                for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) { +                    handles[i-1] = handles[i]; +                } +            } +            nhandles--; +            recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0); +            return (recursed_result == -1) ? -1 : 1 + recursed_result; +        } +        return 1; +    } + +    return 0; +} + +gint g_poll(GPollFD *fds, guint nfds, gint timeout) +{ +    HANDLE handles[MAXIMUM_WAIT_OBJECTS]; +    gboolean poll_msgs = FALSE; +    GPollFD *f; +    gint nhandles = 0; +    int retval; + +    for (f = fds; f < &fds[nfds]; ++f) { +        if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) { +            poll_msgs = TRUE; +        } else if (f->fd > 0) { +            /* Don't add the same handle several times into the array, as +             * docs say that is not allowed, even if it actually does seem +             * to work. +             */ +            gint i; + +            for (i = 0; i < nhandles; i++) { +                if (handles[i] == (HANDLE) f->fd) { +                    break; +                } +            } + +            if (i == nhandles) { +                if (nhandles == MAXIMUM_WAIT_OBJECTS) { +                    g_warning("Too many handles to wait for!\n"); +                    break; +                } else { +                    handles[nhandles++] = (HANDLE) f->fd; +                } +            } +        } +    } + +    for (f = fds; f < &fds[nfds]; ++f) { +        f->revents = 0; +    } + +    if (timeout == -1) { +        timeout = INFINITE; +    } + +    /* Polling for several things? */ +    if (nhandles > 1 || (nhandles > 0 && poll_msgs)) { +        /* First check if one or several of them are immediately +         * available +         */ +        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0); + +        /* If not, and we have a significant timeout, poll again with +         * timeout then. Note that this will return indication for only +         * one event, or only for messages. We ignore timeouts less than +         * ten milliseconds as they are mostly pointless on Windows, the +         * MsgWaitForMultipleObjectsEx() call will timeout right away +         * anyway. +         * +         * Modification for QEMU: replaced timeout >= 10 by timeout > 0. +         */ +        if (retval == 0 && (timeout == INFINITE || timeout > 0)) { +            retval = poll_rest(poll_msgs, handles, nhandles, +                               fds, nfds, timeout); +        } +    } else { +        /* Just polling for one thing, so no need to check first if +         * available immediately +         */ +        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout); +    } + +    if (retval == -1) { +        for (f = fds; f < &fds[nfds]; ++f) { +            f->revents = 0; +        } +    } + +    return retval; +} + +size_t getpagesize(void) +{ +    SYSTEM_INFO system_info; + +    GetSystemInfo(&system_info); +    return system_info.dwPageSize; +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ +    int i; +    size_t pagesize = getpagesize(); + +    memory = (memory + pagesize - 1) & -pagesize; +    for (i = 0; i < memory / pagesize; i++) { +        memset(area + pagesize * i, 0, 1); +    } +} + + +/* XXX: put correct support for win32 */ +int qemu_read_password(char *buf, int buf_size) +{ +    int c, i; + +    printf("Password: "); +    fflush(stdout); +    i = 0; +    for (;;) { +        c = getchar(); +        if (c < 0) { +            buf[i] = '\0'; +            return -1; +        } else if (c == '\n') { +            break; +        } else if (i < (buf_size - 1)) { +            buf[i++] = c; +        } +    } +    buf[i] = '\0'; +    return 0; +} diff --git a/util/path.c b/util/path.c new file mode 100644 index 00000000..4e4877e8 --- /dev/null +++ b/util/path.c @@ -0,0 +1,181 @@ +/* Code to mangle pathnames into those matching a given prefix. +   eg. open("/lib/foo.so") => open("/usr/gnemul/i386-linux/lib/foo.so"); + +   The assumption is that this area does not change. +*/ +#include <sys/types.h> +#include <sys/param.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "qemu-common.h" + +struct pathelem +{ +    /* Name of this, eg. lib */ +    char *name; +    /* Full path name, eg. /usr/gnemul/x86-linux/lib. */ +    char *pathname; +    struct pathelem *parent; +    /* Children */ +    unsigned int num_entries; +    struct pathelem *entries[0]; +}; + +static struct pathelem *base; + +/* First N chars of S1 match S2, and S2 is N chars long. */ +static int strneq(const char *s1, unsigned int n, const char *s2) +{ +    unsigned int i; + +    for (i = 0; i < n; i++) +        if (s1[i] != s2[i]) +            return 0; +    return s2[i] == 0; +} + +static struct pathelem *add_entry(struct pathelem *root, const char *name, +                                  unsigned type); + +static struct pathelem *new_entry(const char *root, +                                  struct pathelem *parent, +                                  const char *name) +{ +    struct pathelem *new = g_malloc(sizeof(*new)); +    new->name = g_strdup(name); +    new->pathname = g_strdup_printf("%s/%s", root, name); +    new->num_entries = 0; +    return new; +} + +#define streq(a,b) (strcmp((a), (b)) == 0) + +/* Not all systems provide this feature */ +#if defined(DT_DIR) && defined(DT_UNKNOWN) && defined(DT_LNK) +# define dirent_type(dirent) ((dirent)->d_type) +# define is_dir_maybe(type) \ +    ((type) == DT_DIR || (type) == DT_UNKNOWN || (type) == DT_LNK) +#else +# define dirent_type(dirent) (1) +# define is_dir_maybe(type)  (type) +#endif + +static struct pathelem *add_dir_maybe(struct pathelem *path) +{ +    DIR *dir; + +    if ((dir = opendir(path->pathname)) != NULL) { +        struct dirent *dirent; + +        while ((dirent = readdir(dir)) != NULL) { +            if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")){ +                path = add_entry(path, dirent->d_name, dirent_type(dirent)); +            } +        } +        closedir(dir); +    } +    return path; +} + +static struct pathelem *add_entry(struct pathelem *root, const char *name, +                                  unsigned type) +{ +    struct pathelem **e; + +    root->num_entries++; + +    root = g_realloc(root, sizeof(*root) +                   + sizeof(root->entries[0])*root->num_entries); +    e = &root->entries[root->num_entries-1]; + +    *e = new_entry(root->pathname, root, name); +    if (is_dir_maybe(type)) { +        *e = add_dir_maybe(*e); +    } + +    return root; +} + +/* This needs to be done after tree is stabilized (ie. no more reallocs!). */ +static void set_parents(struct pathelem *child, struct pathelem *parent) +{ +    unsigned int i; + +    child->parent = parent; +    for (i = 0; i < child->num_entries; i++) +        set_parents(child->entries[i], child); +} + +/* FIXME: Doesn't handle DIR/.. where DIR is not in emulated dir. */ +static const char * +follow_path(const struct pathelem *cursor, const char *name) +{ +    unsigned int i, namelen; + +    name += strspn(name, "/"); +    namelen = strcspn(name, "/"); + +    if (namelen == 0) +        return cursor->pathname; + +    if (strneq(name, namelen, "..")) +        return follow_path(cursor->parent, name + namelen); + +    if (strneq(name, namelen, ".")) +        return follow_path(cursor, name + namelen); + +    for (i = 0; i < cursor->num_entries; i++) +        if (strneq(name, namelen, cursor->entries[i]->name)) +            return follow_path(cursor->entries[i], name + namelen); + +    /* Not found */ +    return NULL; +} + +void init_paths(const char *prefix) +{ +    char pref_buf[PATH_MAX]; + +    if (prefix[0] == '\0' || +        !strcmp(prefix, "/")) +        return; + +    if (prefix[0] != '/') { +        char *cwd = getcwd(NULL, 0); +        size_t pref_buf_len = sizeof(pref_buf); + +        if (!cwd) +            abort(); +        pstrcpy(pref_buf, sizeof(pref_buf), cwd); +        pstrcat(pref_buf, pref_buf_len, "/"); +        pstrcat(pref_buf, pref_buf_len, prefix); +        free(cwd); +    } else +        pstrcpy(pref_buf, sizeof(pref_buf), prefix + 1); + +    base = new_entry("", NULL, pref_buf); +    base = add_dir_maybe(base); +    if (base->num_entries == 0) { +        g_free(base->pathname); +        g_free(base->name); +        g_free(base); +        base = NULL; +    } else { +        set_parents(base, base); +    } +} + +/* Look for path in emulation dir, otherwise return name. */ +const char *path(const char *name) +{ +    /* Only do absolute paths: quick and dirty, but should mostly be OK. +       Could do relative by tracking cwd. */ +    if (!base || !name || name[0] != '/') +        return name; + +    return follow_path(base, name) ?: name; +} diff --git a/util/qemu-config.c b/util/qemu-config.c new file mode 100644 index 00000000..687fd34c --- /dev/null +++ b/util/qemu-config.c @@ -0,0 +1,573 @@ +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "qapi/error.h" +#include "qmp-commands.h" + +static QemuOptsList *vm_config_groups[48]; +static QemuOptsList *drive_config_groups[4]; + +static QemuOptsList *find_list(QemuOptsList **lists, const char *group, +                               Error **errp) +{ +    int i; + +    for (i = 0; lists[i] != NULL; i++) { +        if (strcmp(lists[i]->name, group) == 0) +            break; +    } +    if (lists[i] == NULL) { +        error_setg(errp, "There is no option group '%s'", group); +    } +    return lists[i]; +} + +QemuOptsList *qemu_find_opts(const char *group) +{ +    QemuOptsList *ret; +    Error *local_err = NULL; + +    ret = find_list(vm_config_groups, group, &local_err); +    if (local_err) { +        error_report_err(local_err); +    } + +    return ret; +} + +QemuOpts *qemu_find_opts_singleton(const char *group) +{ +    QemuOptsList *list; +    QemuOpts *opts; + +    list = qemu_find_opts(group); +    assert(list); +    opts = qemu_opts_find(list, NULL); +    if (!opts) { +        opts = qemu_opts_create(list, NULL, 0, &error_abort); +    } +    return opts; +} + +static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc) +{ +    CommandLineParameterInfoList *param_list = NULL, *entry; +    CommandLineParameterInfo *info; +    int i; + +    for (i = 0; desc[i].name != NULL; i++) { +        info = g_malloc0(sizeof(*info)); +        info->name = g_strdup(desc[i].name); + +        switch (desc[i].type) { +        case QEMU_OPT_STRING: +            info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; +            break; +        case QEMU_OPT_BOOL: +            info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN; +            break; +        case QEMU_OPT_NUMBER: +            info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER; +            break; +        case QEMU_OPT_SIZE: +            info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE; +            break; +        } + +        if (desc[i].help) { +            info->has_help = true; +            info->help = g_strdup(desc[i].help); +        } +        if (desc[i].def_value_str) { +            info->has_q_default = true; +            info->q_default = g_strdup(desc[i].def_value_str); +        } + +        entry = g_malloc0(sizeof(*entry)); +        entry->value = info; +        entry->next = param_list; +        param_list = entry; +    } + +    return param_list; +} + +/* remove repeated entry from the info list */ +static void cleanup_infolist(CommandLineParameterInfoList *head) +{ +    CommandLineParameterInfoList *pre_entry, *cur, *del_entry; + +    cur = head; +    while (cur->next) { +        pre_entry = head; +        while (pre_entry != cur->next) { +            if (!strcmp(pre_entry->value->name, cur->next->value->name)) { +                del_entry = cur->next; +                cur->next = cur->next->next; +                g_free(del_entry); +                break; +            } +            pre_entry = pre_entry->next; +        } +        cur = cur->next; +    } +} + +/* merge the description items of two parameter infolists */ +static void connect_infolist(CommandLineParameterInfoList *head, +                             CommandLineParameterInfoList *new) +{ +    CommandLineParameterInfoList *cur; + +    cur = head; +    while (cur->next) { +        cur = cur->next; +    } +    cur->next = new; +} + +/* access all the local QemuOptsLists for drive option */ +static CommandLineParameterInfoList *get_drive_infolist(void) +{ +    CommandLineParameterInfoList *head = NULL, *cur; +    int i; + +    for (i = 0; drive_config_groups[i] != NULL; i++) { +        if (!head) { +            head = query_option_descs(drive_config_groups[i]->desc); +        } else { +            cur = query_option_descs(drive_config_groups[i]->desc); +            connect_infolist(head, cur); +        } +    } +    cleanup_infolist(head); + +    return head; +} + +/* restore machine options that are now machine's properties */ +static QemuOptsList machine_opts = { +    .merge_lists = true, +    .head = QTAILQ_HEAD_INITIALIZER(machine_opts.head), +    .desc = { +        { +            .name = "type", +            .type = QEMU_OPT_STRING, +            .help = "emulated machine" +        },{ +            .name = "accel", +            .type = QEMU_OPT_STRING, +            .help = "accelerator list", +        },{ +            .name = "kernel_irqchip", +            .type = QEMU_OPT_BOOL, +            .help = "use KVM in-kernel irqchip", +        },{ +            .name = "kvm_shadow_mem", +            .type = QEMU_OPT_SIZE, +            .help = "KVM shadow MMU size", +        },{ +            .name = "kernel", +            .type = QEMU_OPT_STRING, +            .help = "Linux kernel image file", +        },{ +            .name = "initrd", +            .type = QEMU_OPT_STRING, +            .help = "Linux initial ramdisk file", +        },{ +            .name = "append", +            .type = QEMU_OPT_STRING, +            .help = "Linux kernel command line", +        },{ +            .name = "dtb", +            .type = QEMU_OPT_STRING, +            .help = "Linux kernel device tree file", +        },{ +            .name = "dumpdtb", +            .type = QEMU_OPT_STRING, +            .help = "Dump current dtb to a file and quit", +        },{ +            .name = "phandle_start", +            .type = QEMU_OPT_NUMBER, +            .help = "The first phandle ID we may generate dynamically", +        },{ +            .name = "dt_compatible", +            .type = QEMU_OPT_STRING, +            .help = "Overrides the \"compatible\" property of the dt root node", +        },{ +            .name = "dump-guest-core", +            .type = QEMU_OPT_BOOL, +            .help = "Include guest memory in  a core dump", +        },{ +            .name = "mem-merge", +            .type = QEMU_OPT_BOOL, +            .help = "enable/disable memory merge support", +        },{ +            .name = "usb", +            .type = QEMU_OPT_BOOL, +            .help = "Set on/off to enable/disable usb", +        },{ +            .name = "firmware", +            .type = QEMU_OPT_STRING, +            .help = "firmware image", +        },{ +            .name = "iommu", +            .type = QEMU_OPT_BOOL, +            .help = "Set on/off to enable/disable Intel IOMMU (VT-d)", +        },{ +            .name = "suppress-vmdesc", +            .type = QEMU_OPT_BOOL, +            .help = "Set on to disable self-describing migration", +        },{ +            .name = "aes-key-wrap", +            .type = QEMU_OPT_BOOL, +            .help = "enable/disable AES key wrapping using the CPACF wrapping key", +        },{ +            .name = "dea-key-wrap", +            .type = QEMU_OPT_BOOL, +            .help = "enable/disable DEA key wrapping using the CPACF wrapping key", +        }, +        { /* End of list */ } +    } +}; + +CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option, +                                                          const char *option, +                                                          Error **errp) +{ +    CommandLineOptionInfoList *conf_list = NULL, *entry; +    CommandLineOptionInfo *info; +    int i; + +    for (i = 0; vm_config_groups[i] != NULL; i++) { +        if (!has_option || !strcmp(option, vm_config_groups[i]->name)) { +            info = g_malloc0(sizeof(*info)); +            info->option = g_strdup(vm_config_groups[i]->name); +            if (!strcmp("drive", vm_config_groups[i]->name)) { +                info->parameters = get_drive_infolist(); +            } else if (!strcmp("machine", vm_config_groups[i]->name)) { +                info->parameters = query_option_descs(machine_opts.desc); +            } else { +                info->parameters = +                    query_option_descs(vm_config_groups[i]->desc); +            } +            entry = g_malloc0(sizeof(*entry)); +            entry->value = info; +            entry->next = conf_list; +            conf_list = entry; +        } +    } + +    if (conf_list == NULL) { +        error_setg(errp, "invalid option name: %s", option); +    } + +    return conf_list; +} + +QemuOptsList *qemu_find_opts_err(const char *group, Error **errp) +{ +    return find_list(vm_config_groups, group, errp); +} + +void qemu_add_drive_opts(QemuOptsList *list) +{ +    int entries, i; + +    entries = ARRAY_SIZE(drive_config_groups); +    entries--; /* keep list NULL terminated */ +    for (i = 0; i < entries; i++) { +        if (drive_config_groups[i] == NULL) { +            drive_config_groups[i] = list; +            return; +        } +    } +    fprintf(stderr, "ran out of space in drive_config_groups"); +    abort(); +} + +void qemu_add_opts(QemuOptsList *list) +{ +    int entries, i; + +    entries = ARRAY_SIZE(vm_config_groups); +    entries--; /* keep list NULL terminated */ +    for (i = 0; i < entries; i++) { +        if (vm_config_groups[i] == NULL) { +            vm_config_groups[i] = list; +            return; +        } +    } +    fprintf(stderr, "ran out of space in vm_config_groups"); +    abort(); +} + +int qemu_set_option(const char *str) +{ +    Error *local_err = NULL; +    char group[64], id[64], arg[64]; +    QemuOptsList *list; +    QemuOpts *opts; +    int rc, offset; + +    rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset); +    if (rc < 3 || str[offset] != '=') { +        error_report("can't parse: \"%s\"", str); +        return -1; +    } + +    list = qemu_find_opts(group); +    if (list == NULL) { +        return -1; +    } + +    opts = qemu_opts_find(list, id); +    if (!opts) { +        error_report("there is no %s \"%s\" defined", +                     list->name, id); +        return -1; +    } + +    qemu_opt_set(opts, arg, str + offset + 1, &local_err); +    if (local_err) { +        error_report_err(local_err); +        return -1; +    } +    return 0; +} + +struct ConfigWriteData { +    QemuOptsList *list; +    FILE *fp; +}; + +static int config_write_opt(void *opaque, const char *name, const char *value, +                            Error **errp) +{ +    struct ConfigWriteData *data = opaque; + +    fprintf(data->fp, "  %s = \"%s\"\n", name, value); +    return 0; +} + +static int config_write_opts(void *opaque, QemuOpts *opts, Error **errp) +{ +    struct ConfigWriteData *data = opaque; +    const char *id = qemu_opts_id(opts); + +    if (id) { +        fprintf(data->fp, "[%s \"%s\"]\n", data->list->name, id); +    } else { +        fprintf(data->fp, "[%s]\n", data->list->name); +    } +    qemu_opt_foreach(opts, config_write_opt, data, NULL); +    fprintf(data->fp, "\n"); +    return 0; +} + +void qemu_config_write(FILE *fp) +{ +    struct ConfigWriteData data = { .fp = fp }; +    QemuOptsList **lists = vm_config_groups; +    int i; + +    fprintf(fp, "# qemu config file\n\n"); +    for (i = 0; lists[i] != NULL; i++) { +        data.list = lists[i]; +        qemu_opts_foreach(data.list, config_write_opts, &data, NULL); +    } +} + +int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname) +{ +    char line[1024], group[64], id[64], arg[64], value[1024]; +    Location loc; +    QemuOptsList *list = NULL; +    Error *local_err = NULL; +    QemuOpts *opts = NULL; +    int res = -1, lno = 0; + +    loc_push_none(&loc); +    while (fgets(line, sizeof(line), fp) != NULL) { +        loc_set_file(fname, ++lno); +        if (line[0] == '\n') { +            /* skip empty lines */ +            continue; +        } +        if (line[0] == '#') { +            /* comment */ +            continue; +        } +        if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) { +            /* group with id */ +            list = find_list(lists, group, &local_err); +            if (local_err) { +                error_report_err(local_err); +                goto out; +            } +            opts = qemu_opts_create(list, id, 1, NULL); +            continue; +        } +        if (sscanf(line, "[%63[^]]]", group) == 1) { +            /* group without id */ +            list = find_list(lists, group, &local_err); +            if (local_err) { +                error_report_err(local_err); +                goto out; +            } +            opts = qemu_opts_create(list, NULL, 0, &error_abort); +            continue; +        } +        value[0] = '\0'; +        if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 || +            sscanf(line, " %63s = \"\"", arg) == 1) { +            /* arg = value */ +            if (opts == NULL) { +                error_report("no group defined"); +                goto out; +            } +            qemu_opt_set(opts, arg, value, &local_err); +            if (local_err) { +                error_report_err(local_err); +                goto out; +            } +            continue; +        } +        error_report("parse error"); +        goto out; +    } +    if (ferror(fp)) { +        error_report("error reading file"); +        goto out; +    } +    res = 0; +out: +    loc_pop(&loc); +    return res; +} + +int qemu_read_config_file(const char *filename) +{ +    FILE *f = fopen(filename, "r"); +    int ret; + +    if (f == NULL) { +        return -errno; +    } + +    ret = qemu_config_parse(f, vm_config_groups, filename); +    fclose(f); + +    if (ret == 0) { +        return 0; +    } else { +        return -EINVAL; +    } +} + +static void config_parse_qdict_section(QDict *options, QemuOptsList *opts, +                                       Error **errp) +{ +    QemuOpts *subopts; +    QDict *subqdict; +    QList *list = NULL; +    Error *local_err = NULL; +    size_t orig_size, enum_size; +    char *prefix; + +    prefix = g_strdup_printf("%s.", opts->name); +    qdict_extract_subqdict(options, &subqdict, prefix); +    g_free(prefix); +    orig_size = qdict_size(subqdict); +    if (!orig_size) { +        goto out; +    } + +    subopts = qemu_opts_create(opts, NULL, 0, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        goto out; +    } + +    qemu_opts_absorb_qdict(subopts, subqdict, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        goto out; +    } + +    enum_size = qdict_size(subqdict); +    if (enum_size < orig_size && enum_size) { +        error_setg(errp, "Unknown option '%s' for [%s]", +                   qdict_first(subqdict)->key, opts->name); +        goto out; +    } + +    if (enum_size) { +        /* Multiple, enumerated sections */ +        QListEntry *list_entry; +        unsigned i = 0; + +        /* Not required anymore */ +        qemu_opts_del(subopts); + +        qdict_array_split(subqdict, &list); +        if (qdict_size(subqdict)) { +            error_setg(errp, "Unused option '%s' for [%s]", +                       qdict_first(subqdict)->key, opts->name); +            goto out; +        } + +        QLIST_FOREACH_ENTRY(list, list_entry) { +            QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry)); +            char *opt_name; + +            if (!section) { +                error_setg(errp, "[%s] section (index %u) does not consist of " +                           "keys", opts->name, i); +                goto out; +            } + +            opt_name = g_strdup_printf("%s.%u", opts->name, i++); +            subopts = qemu_opts_create(opts, opt_name, 1, &local_err); +            g_free(opt_name); +            if (local_err) { +                error_propagate(errp, local_err); +                goto out; +            } + +            qemu_opts_absorb_qdict(subopts, section, &local_err); +            if (local_err) { +                error_propagate(errp, local_err); +                qemu_opts_del(subopts); +                goto out; +            } + +            if (qdict_size(section)) { +                error_setg(errp, "[%s] section doesn't support the option '%s'", +                           opts->name, qdict_first(section)->key); +                qemu_opts_del(subopts); +                goto out; +            } +        } +    } + +out: +    QDECREF(subqdict); +    QDECREF(list); +} + +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, +                             Error **errp) +{ +    int i; +    Error *local_err = NULL; + +    for (i = 0; lists[i]; i++) { +        config_parse_qdict_section(options, lists[i], &local_err); +        if (local_err) { +            error_propagate(errp, local_err); +            return; +        } +    } +} diff --git a/util/qemu-error.c b/util/qemu-error.c new file mode 100644 index 00000000..77ea6c61 --- /dev/null +++ b/util/qemu-error.c @@ -0,0 +1,239 @@ +/* + * Error reporting + * + * Copyright (C) 2010 Red Hat Inc. + * + * Authors: + *  Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <stdio.h> +#include "monitor/monitor.h" +#include "qemu/error-report.h" + +/* + * Print to current monitor if we have one, else to stderr. + * TODO should return int, so callers can calculate width, but that + * requires surgery to monitor_vprintf().  Left for another day. + */ +void error_vprintf(const char *fmt, va_list ap) +{ +    if (cur_mon && !monitor_cur_is_qmp()) { +        monitor_vprintf(cur_mon, fmt, ap); +    } else { +        vfprintf(stderr, fmt, ap); +    } +} + +/* + * Print to current monitor if we have one, else to stderr. + * TODO just like error_vprintf() + */ +void error_printf(const char *fmt, ...) +{ +    va_list ap; + +    va_start(ap, fmt); +    error_vprintf(fmt, ap); +    va_end(ap); +} + +void error_printf_unless_qmp(const char *fmt, ...) +{ +    va_list ap; + +    if (!monitor_cur_is_qmp()) { +        va_start(ap, fmt); +        error_vprintf(fmt, ap); +        va_end(ap); +    } +} + +static Location std_loc = { +    .kind = LOC_NONE +}; +static Location *cur_loc = &std_loc; + +/* + * Push location saved in LOC onto the location stack, return it. + * The top of that stack is the current location. + * Needs a matching loc_pop(). + */ +Location *loc_push_restore(Location *loc) +{ +    assert(!loc->prev); +    loc->prev = cur_loc; +    cur_loc = loc; +    return loc; +} + +/* + * Initialize *LOC to "nowhere", push it onto the location stack. + * The top of that stack is the current location. + * Needs a matching loc_pop(). + * Return LOC. + */ +Location *loc_push_none(Location *loc) +{ +    loc->kind = LOC_NONE; +    loc->prev = NULL; +    return loc_push_restore(loc); +} + +/* + * Pop the location stack. + * LOC must be the current location, i.e. the top of the stack. + */ +Location *loc_pop(Location *loc) +{ +    assert(cur_loc == loc && loc->prev); +    cur_loc = loc->prev; +    loc->prev = NULL; +    return loc; +} + +/* + * Save the current location in LOC, return LOC. + */ +Location *loc_save(Location *loc) +{ +    *loc = *cur_loc; +    loc->prev = NULL; +    return loc; +} + +/* + * Change the current location to the one saved in LOC. + */ +void loc_restore(Location *loc) +{ +    Location *prev = cur_loc->prev; +    assert(!loc->prev); +    *cur_loc = *loc; +    cur_loc->prev = prev; +} + +/* + * Change the current location to "nowhere in particular". + */ +void loc_set_none(void) +{ +    cur_loc->kind = LOC_NONE; +} + +/* + * Change the current location to argument ARGV[IDX..IDX+CNT-1]. + */ +void loc_set_cmdline(char **argv, int idx, int cnt) +{ +    cur_loc->kind = LOC_CMDLINE; +    cur_loc->num = cnt; +    cur_loc->ptr = argv + idx; +} + +/* + * Change the current location to file FNAME, line LNO. + */ +void loc_set_file(const char *fname, int lno) +{ +    assert (fname || cur_loc->kind == LOC_FILE); +    cur_loc->kind = LOC_FILE; +    cur_loc->num = lno; +    if (fname) { +        cur_loc->ptr = fname; +    } +} + +static const char *progname; + +/* + * Set the program name for error_print_loc(). + */ +void error_set_progname(const char *argv0) +{ +    const char *p = strrchr(argv0, '/'); +    progname = p ? p + 1 : argv0; +} + +const char *error_get_progname(void) +{ +    return progname; +} + +/* + * Print current location to current monitor if we have one, else to stderr. + */ +static void error_print_loc(void) +{ +    const char *sep = ""; +    int i; +    const char *const *argp; + +    if (!cur_mon && progname) { +        fprintf(stderr, "%s:", progname); +        sep = " "; +    } +    switch (cur_loc->kind) { +    case LOC_CMDLINE: +        argp = cur_loc->ptr; +        for (i = 0; i < cur_loc->num; i++) { +            error_printf("%s%s", sep, argp[i]); +            sep = " "; +        } +        error_printf(": "); +        break; +    case LOC_FILE: +        error_printf("%s:", (const char *)cur_loc->ptr); +        if (cur_loc->num) { +            error_printf("%d:", cur_loc->num); +        } +        error_printf(" "); +        break; +    default: +        error_printf("%s", sep); +    } +} + +bool enable_timestamp_msg; +/* + * Print an error message to current monitor if we have one, else to stderr. + * Format arguments like vsprintf().  The result should not contain + * newlines. + * Prepend the current location and append a newline. + * It's wrong to call this in a QMP monitor.  Use error_setg() there. + */ +void error_vreport(const char *fmt, va_list ap) +{ +    GTimeVal tv; +    gchar *timestr; + +    if (enable_timestamp_msg) { +        g_get_current_time(&tv); +        timestr = g_time_val_to_iso8601(&tv); +        error_printf("%s ", timestr); +        g_free(timestr); +    } + +    error_print_loc(); +    error_vprintf(fmt, ap); +    error_printf("\n"); +} + +/* + * Print an error message to current monitor if we have one, else to stderr. + * Format arguments like sprintf().  The result should not contain + * newlines. + * Prepend the current location and append a newline. + * It's wrong to call this in a QMP monitor.  Use error_setg() there. + */ +void error_report(const char *fmt, ...) +{ +    va_list ap; + +    va_start(ap, fmt); +    error_vreport(fmt, ap); +    va_end(ap); +} diff --git a/util/qemu-openpty.c b/util/qemu-openpty.c new file mode 100644 index 00000000..4c532111 --- /dev/null +++ b/util/qemu-openpty.c @@ -0,0 +1,137 @@ +/* + * qemu-openpty.c + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * Wrapper function qemu_openpty() implementation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * This is not part of oslib-posix.c because this function + * uses openpty() which often in -lutil, and if we add this + * dependency to oslib-posix.o, every app will have to be + * linked with -lutil. + */ + +#include "config-host.h" +#include "qemu-common.h" + +#if defined(__GLIBC__) +# include <pty.h> +#elif defined CONFIG_BSD +# include <termios.h> +# if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) +#  include <libutil.h> +# else +#  include <util.h> +# endif +#elif defined CONFIG_SOLARIS +# include <termios.h> +# include <stropts.h> +#else +# include <termios.h> +#endif + +#ifdef __sun__ +/* Once Solaris has openpty(), this is going to be removed. */ +static int openpty(int *amaster, int *aslave, char *name, +                   struct termios *termp, struct winsize *winp) +{ +        const char *slave; +        int mfd = -1, sfd = -1; + +        *amaster = *aslave = -1; + +        mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY); +        if (mfd < 0) +                goto err; + +        if (grantpt(mfd) == -1 || unlockpt(mfd) == -1) +                goto err; + +        if ((slave = ptsname(mfd)) == NULL) +                goto err; + +        if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1) +                goto err; + +        if (ioctl(sfd, I_PUSH, "ptem") == -1 || +            (termp != NULL && tcgetattr(sfd, termp) < 0)) +                goto err; + +        if (amaster) +                *amaster = mfd; +        if (aslave) +                *aslave = sfd; +        if (winp) +                ioctl(sfd, TIOCSWINSZ, winp); + +        return 0; + +err: +        if (sfd != -1) +                close(sfd); +        close(mfd); +        return -1; +} + +static void cfmakeraw (struct termios *termios_p) +{ +        termios_p->c_iflag &= +                ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON); +        termios_p->c_oflag &= ~OPOST; +        termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN); +        termios_p->c_cflag &= ~(CSIZE|PARENB); +        termios_p->c_cflag |= CS8; + +        termios_p->c_cc[VMIN] = 0; +        termios_p->c_cc[VTIME] = 0; +} +#endif + +int qemu_openpty_raw(int *aslave, char *pty_name) +{ +    int amaster; +    struct termios tty; +#if defined(__OpenBSD__) || defined(__DragonFly__) +    char pty_buf[PATH_MAX]; +#define q_ptsname(x) pty_buf +#else +    char *pty_buf = NULL; +#define q_ptsname(x) ptsname(x) +#endif + +    if (openpty(&amaster, aslave, pty_buf, NULL, NULL) < 0) { +        return -1; +    } + +    /* Set raw attributes on the pty. */ +    tcgetattr(*aslave, &tty); +    cfmakeraw(&tty); +    tcsetattr(*aslave, TCSAFLUSH, &tty); + +    if (pty_name) { +        strcpy(pty_name, q_ptsname(amaster)); +    } + +    return amaster; +} diff --git a/util/qemu-option.c b/util/qemu-option.c new file mode 100644 index 00000000..efe9d279 --- /dev/null +++ b/util/qemu-option.c @@ -0,0 +1,1180 @@ +/* + * Commandline option parsing functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> + +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qapi/qmp/types.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/option_int.h" + +/* + * Extracts the name of an option from the parameter string (p points at the + * first byte of the option name) + * + * The option name is delimited by delim (usually , or =) or the string end + * and is copied into buf. If the option name is longer than buf_size, it is + * truncated. buf is always zero terminated. + * + * The return value is the position of the delimiter/zero byte after the option + * name in p. + */ +const char *get_opt_name(char *buf, int buf_size, const char *p, char delim) +{ +    char *q; + +    q = buf; +    while (*p != '\0' && *p != delim) { +        if (q && (q - buf) < buf_size - 1) +            *q++ = *p; +        p++; +    } +    if (q) +        *q = '\0'; + +    return p; +} + +/* + * Extracts the value of an option from the parameter string p (p points at the + * first byte of the option value) + * + * This function is comparable to get_opt_name with the difference that the + * delimiter is fixed to be comma which starts a new option. To specify an + * option value that contains commas, double each comma. + */ +const char *get_opt_value(char *buf, int buf_size, const char *p) +{ +    char *q; + +    q = buf; +    while (*p != '\0') { +        if (*p == ',') { +            if (*(p + 1) != ',') +                break; +            p++; +        } +        if (q && (q - buf) < buf_size - 1) +            *q++ = *p; +        p++; +    } +    if (q) +        *q = '\0'; + +    return p; +} + +int get_next_param_value(char *buf, int buf_size, +                         const char *tag, const char **pstr) +{ +    const char *p; +    char option[128]; + +    p = *pstr; +    for(;;) { +        p = get_opt_name(option, sizeof(option), p, '='); +        if (*p != '=') +            break; +        p++; +        if (!strcmp(tag, option)) { +            *pstr = get_opt_value(buf, buf_size, p); +            if (**pstr == ',') { +                (*pstr)++; +            } +            return strlen(buf); +        } else { +            p = get_opt_value(NULL, 0, p); +        } +        if (*p != ',') +            break; +        p++; +    } +    return 0; +} + +int get_param_value(char *buf, int buf_size, +                    const char *tag, const char *str) +{ +    return get_next_param_value(buf, buf_size, tag, &str); +} + +static void parse_option_bool(const char *name, const char *value, bool *ret, +                              Error **errp) +{ +    if (value != NULL) { +        if (!strcmp(value, "on")) { +            *ret = 1; +        } else if (!strcmp(value, "off")) { +            *ret = 0; +        } else { +            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, +                       name, "'on' or 'off'"); +        } +    } else { +        *ret = 1; +    } +} + +static void parse_option_number(const char *name, const char *value, +                                uint64_t *ret, Error **errp) +{ +    char *postfix; +    uint64_t number; + +    if (value != NULL) { +        number = strtoull(value, &postfix, 0); +        if (*postfix != '\0') { +            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number"); +            return; +        } +        *ret = number; +    } else { +        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number"); +    } +} + +static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc, +                                            const char *name) +{ +    int i; + +    for (i = 0; desc[i].name != NULL; i++) { +        if (strcmp(desc[i].name, name) == 0) { +            return &desc[i]; +        } +    } + +    return NULL; +} + +void parse_option_size(const char *name, const char *value, +                       uint64_t *ret, Error **errp) +{ +    char *postfix; +    double sizef; + +    if (value != NULL) { +        sizef = strtod(value, &postfix); +        switch (*postfix) { +        case 'T': +            sizef *= 1024; +            /* fall through */ +        case 'G': +            sizef *= 1024; +            /* fall through */ +        case 'M': +            sizef *= 1024; +            /* fall through */ +        case 'K': +        case 'k': +            sizef *= 1024; +            /* fall through */ +        case 'b': +        case '\0': +            *ret = (uint64_t) sizef; +            break; +        default: +            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size"); +#if 0 /* conversion from qerror_report() to error_set() broke this: */ +            error_printf_unless_qmp("You may use k, M, G or T suffixes for " +                    "kilobytes, megabytes, gigabytes and terabytes.\n"); +#endif +            return; +        } +    } else { +        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size"); +    } +} + +bool has_help_option(const char *param) +{ +    size_t buflen = strlen(param) + 1; +    char *buf = g_malloc(buflen); +    const char *p = param; +    bool result = false; + +    while (*p) { +        p = get_opt_value(buf, buflen, p); +        if (*p) { +            p++; +        } + +        if (is_help_option(buf)) { +            result = true; +            goto out; +        } +    } + +out: +    g_free(buf); +    return result; +} + +bool is_valid_option_list(const char *param) +{ +    size_t buflen = strlen(param) + 1; +    char *buf = g_malloc(buflen); +    const char *p = param; +    bool result = true; + +    while (*p) { +        p = get_opt_value(buf, buflen, p); +        if (*p && !*++p) { +            result = false; +            goto out; +        } + +        if (!*buf || *buf == ',') { +            result = false; +            goto out; +        } +    } + +out: +    g_free(buf); +    return result; +} + +void qemu_opts_print_help(QemuOptsList *list) +{ +    QemuOptDesc *desc; + +    assert(list); +    desc = list->desc; +    printf("Supported options:\n"); +    while (desc && desc->name) { +        printf("%-16s %s\n", desc->name, +               desc->help ? desc->help : "No description available"); +        desc++; +    } +} +/* ------------------------------------------------------------------ */ + +QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name) +{ +    QemuOpt *opt; + +    QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) { +        if (strcmp(opt->name, name) != 0) +            continue; +        return opt; +    } +    return NULL; +} + +static void qemu_opt_del(QemuOpt *opt) +{ +    QTAILQ_REMOVE(&opt->opts->head, opt, next); +    g_free(opt->name); +    g_free(opt->str); +    g_free(opt); +} + +/* qemu_opt_set allows many settings for the same option. + * This function deletes all settings for an option. + */ +static void qemu_opt_del_all(QemuOpts *opts, const char *name) +{ +    QemuOpt *opt, *next_opt; + +    QTAILQ_FOREACH_SAFE(opt, &opts->head, next, next_opt) { +        if (!strcmp(opt->name, name)) { +            qemu_opt_del(opt); +        } +    } +} + +const char *qemu_opt_get(QemuOpts *opts, const char *name) +{ +    QemuOpt *opt; + +    if (opts == NULL) { +        return NULL; +    } + +    opt = qemu_opt_find(opts, name); +    if (!opt) { +        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); +        if (desc && desc->def_value_str) { +            return desc->def_value_str; +        } +    } +    return opt ? opt->str : NULL; +} + +/* Get a known option (or its default) and remove it from the list + * all in one action. Return a malloced string of the option value. + * Result must be freed by caller with g_free(). + */ +char *qemu_opt_get_del(QemuOpts *opts, const char *name) +{ +    QemuOpt *opt; +    const QemuOptDesc *desc; +    char *str = NULL; + +    if (opts == NULL) { +        return NULL; +    } + +    opt = qemu_opt_find(opts, name); +    if (!opt) { +        desc = find_desc_by_name(opts->list->desc, name); +        if (desc && desc->def_value_str) { +            str = g_strdup(desc->def_value_str); +        } +        return str; +    } +    str = opt->str; +    opt->str = NULL; +    qemu_opt_del_all(opts, name); +    return str; +} + +bool qemu_opt_has_help_opt(QemuOpts *opts) +{ +    QemuOpt *opt; + +    QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) { +        if (is_help_option(opt->name)) { +            return true; +        } +    } +    return false; +} + +static bool qemu_opt_get_bool_helper(QemuOpts *opts, const char *name, +                                     bool defval, bool del) +{ +    QemuOpt *opt; +    bool ret = defval; + +    if (opts == NULL) { +        return ret; +    } + +    opt = qemu_opt_find(opts, name); +    if (opt == NULL) { +        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); +        if (desc && desc->def_value_str) { +            parse_option_bool(name, desc->def_value_str, &ret, &error_abort); +        } +        return ret; +    } +    assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL); +    ret = opt->value.boolean; +    if (del) { +        qemu_opt_del_all(opts, name); +    } +    return ret; +} + +bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval) +{ +    return qemu_opt_get_bool_helper(opts, name, defval, false); +} + +bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval) +{ +    return qemu_opt_get_bool_helper(opts, name, defval, true); +} + +static uint64_t qemu_opt_get_number_helper(QemuOpts *opts, const char *name, +                                           uint64_t defval, bool del) +{ +    QemuOpt *opt; +    uint64_t ret = defval; + +    if (opts == NULL) { +        return ret; +    } + +    opt = qemu_opt_find(opts, name); +    if (opt == NULL) { +        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); +        if (desc && desc->def_value_str) { +            parse_option_number(name, desc->def_value_str, &ret, &error_abort); +        } +        return ret; +    } +    assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER); +    ret = opt->value.uint; +    if (del) { +        qemu_opt_del_all(opts, name); +    } +    return ret; +} + +uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval) +{ +    return qemu_opt_get_number_helper(opts, name, defval, false); +} + +uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name, +                                 uint64_t defval) +{ +    return qemu_opt_get_number_helper(opts, name, defval, true); +} + +static uint64_t qemu_opt_get_size_helper(QemuOpts *opts, const char *name, +                                         uint64_t defval, bool del) +{ +    QemuOpt *opt; +    uint64_t ret = defval; + +    if (opts == NULL) { +        return ret; +    } + +    opt = qemu_opt_find(opts, name); +    if (opt == NULL) { +        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); +        if (desc && desc->def_value_str) { +            parse_option_size(name, desc->def_value_str, &ret, &error_abort); +        } +        return ret; +    } +    assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE); +    ret = opt->value.uint; +    if (del) { +        qemu_opt_del_all(opts, name); +    } +    return ret; +} + +uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval) +{ +    return qemu_opt_get_size_helper(opts, name, defval, false); +} + +uint64_t qemu_opt_get_size_del(QemuOpts *opts, const char *name, +                               uint64_t defval) +{ +    return qemu_opt_get_size_helper(opts, name, defval, true); +} + +static void qemu_opt_parse(QemuOpt *opt, Error **errp) +{ +    if (opt->desc == NULL) +        return; + +    switch (opt->desc->type) { +    case QEMU_OPT_STRING: +        /* nothing */ +        return; +    case QEMU_OPT_BOOL: +        parse_option_bool(opt->name, opt->str, &opt->value.boolean, errp); +        break; +    case QEMU_OPT_NUMBER: +        parse_option_number(opt->name, opt->str, &opt->value.uint, errp); +        break; +    case QEMU_OPT_SIZE: +        parse_option_size(opt->name, opt->str, &opt->value.uint, errp); +        break; +    default: +        abort(); +    } +} + +static bool opts_accepts_any(const QemuOpts *opts) +{ +    return opts->list->desc[0].name == NULL; +} + +int qemu_opt_unset(QemuOpts *opts, const char *name) +{ +    QemuOpt *opt = qemu_opt_find(opts, name); + +    assert(opts_accepts_any(opts)); + +    if (opt == NULL) { +        return -1; +    } else { +        qemu_opt_del(opt); +        return 0; +    } +} + +static void opt_set(QemuOpts *opts, const char *name, const char *value, +                    bool prepend, Error **errp) +{ +    QemuOpt *opt; +    const QemuOptDesc *desc; +    Error *local_err = NULL; + +    desc = find_desc_by_name(opts->list->desc, name); +    if (!desc && !opts_accepts_any(opts)) { +        error_setg(errp, QERR_INVALID_PARAMETER, name); +        return; +    } + +    opt = g_malloc0(sizeof(*opt)); +    opt->name = g_strdup(name); +    opt->opts = opts; +    if (prepend) { +        QTAILQ_INSERT_HEAD(&opts->head, opt, next); +    } else { +        QTAILQ_INSERT_TAIL(&opts->head, opt, next); +    } +    opt->desc = desc; +    opt->str = g_strdup(value); +    qemu_opt_parse(opt, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        qemu_opt_del(opt); +    } +} + +void qemu_opt_set(QemuOpts *opts, const char *name, const char *value, +                  Error **errp) +{ +    opt_set(opts, name, value, false, errp); +} + +void qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val, +                       Error **errp) +{ +    QemuOpt *opt; +    const QemuOptDesc *desc = opts->list->desc; + +    opt = g_malloc0(sizeof(*opt)); +    opt->desc = find_desc_by_name(desc, name); +    if (!opt->desc && !opts_accepts_any(opts)) { +        error_setg(errp, QERR_INVALID_PARAMETER, name); +        g_free(opt); +        return; +    } + +    opt->name = g_strdup(name); +    opt->opts = opts; +    opt->value.boolean = !!val; +    opt->str = g_strdup(val ? "on" : "off"); +    QTAILQ_INSERT_TAIL(&opts->head, opt, next); +} + +void qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val, +                         Error **errp) +{ +    QemuOpt *opt; +    const QemuOptDesc *desc = opts->list->desc; + +    opt = g_malloc0(sizeof(*opt)); +    opt->desc = find_desc_by_name(desc, name); +    if (!opt->desc && !opts_accepts_any(opts)) { +        error_setg(errp, QERR_INVALID_PARAMETER, name); +        g_free(opt); +        return; +    } + +    opt->name = g_strdup(name); +    opt->opts = opts; +    opt->value.uint = val; +    opt->str = g_strdup_printf("%" PRId64, val); +    QTAILQ_INSERT_TAIL(&opts->head, opt, next); +} + +/** + * For each member of @opts, call @func(@opaque, name, value, @errp). + * @func() may store an Error through @errp, but must return non-zero then. + * When @func() returns non-zero, break the loop and return that value. + * Return zero when the loop completes. + */ +int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, +                     Error **errp) +{ +    QemuOpt *opt; +    int rc; + +    QTAILQ_FOREACH(opt, &opts->head, next) { +        rc = func(opaque, opt->name, opt->str, errp); +        if (rc) { +            return rc; +        } +        assert(!errp || !*errp); +    } +    return 0; +} + +QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id) +{ +    QemuOpts *opts; + +    QTAILQ_FOREACH(opts, &list->head, next) { +        if (!opts->id && !id) { +            return opts; +        } +        if (opts->id && id && !strcmp(opts->id, id)) { +            return opts; +        } +    } +    return NULL; +} + +QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, +                           int fail_if_exists, Error **errp) +{ +    QemuOpts *opts = NULL; + +    if (id) { +        if (!id_wellformed(id)) { +            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id", +                       "an identifier"); +#if 0 /* conversion from qerror_report() to error_set() broke this: */ +            error_printf_unless_qmp("Identifiers consist of letters, digits, '-', '.', '_', starting with a letter.\n"); +#endif +            return NULL; +        } +        opts = qemu_opts_find(list, id); +        if (opts != NULL) { +            if (fail_if_exists && !list->merge_lists) { +                error_setg(errp, "Duplicate ID '%s' for %s", id, list->name); +                return NULL; +            } else { +                return opts; +            } +        } +    } else if (list->merge_lists) { +        opts = qemu_opts_find(list, NULL); +        if (opts) { +            return opts; +        } +    } +    opts = g_malloc0(sizeof(*opts)); +    opts->id = g_strdup(id); +    opts->list = list; +    loc_save(&opts->loc); +    QTAILQ_INIT(&opts->head); +    QTAILQ_INSERT_TAIL(&list->head, opts, next); +    return opts; +} + +void qemu_opts_reset(QemuOptsList *list) +{ +    QemuOpts *opts, *next_opts; + +    QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) { +        qemu_opts_del(opts); +    } +} + +void qemu_opts_loc_restore(QemuOpts *opts) +{ +    loc_restore(&opts->loc); +} + +void qemu_opts_set(QemuOptsList *list, const char *id, +                   const char *name, const char *value, Error **errp) +{ +    QemuOpts *opts; +    Error *local_err = NULL; + +    opts = qemu_opts_create(list, id, 1, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        return; +    } +    qemu_opt_set(opts, name, value, errp); +} + +const char *qemu_opts_id(QemuOpts *opts) +{ +    return opts->id; +} + +/* The id string will be g_free()d by qemu_opts_del */ +void qemu_opts_set_id(QemuOpts *opts, char *id) +{ +    opts->id = id; +} + +void qemu_opts_del(QemuOpts *opts) +{ +    QemuOpt *opt; + +    if (opts == NULL) { +        return; +    } + +    for (;;) { +        opt = QTAILQ_FIRST(&opts->head); +        if (opt == NULL) +            break; +        qemu_opt_del(opt); +    } +    QTAILQ_REMOVE(&opts->list->head, opts, next); +    g_free(opts->id); +    g_free(opts); +} + +void qemu_opts_print(QemuOpts *opts, const char *sep) +{ +    QemuOpt *opt; +    QemuOptDesc *desc = opts->list->desc; + +    if (desc[0].name == NULL) { +        QTAILQ_FOREACH(opt, &opts->head, next) { +            printf("%s%s=\"%s\"", sep, opt->name, opt->str); +        } +        return; +    } +    for (; desc && desc->name; desc++) { +        const char *value; +        QemuOpt *opt = qemu_opt_find(opts, desc->name); + +        value = opt ? opt->str : desc->def_value_str; +        if (!value) { +            continue; +        } +        if (desc->type == QEMU_OPT_STRING) { +            printf("%s%s='%s'", sep, desc->name, value); +        } else if ((desc->type == QEMU_OPT_SIZE || +                    desc->type == QEMU_OPT_NUMBER) && opt) { +            printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint); +        } else { +            printf("%s%s=%s", sep, desc->name, value); +        } +    } +} + +static void opts_do_parse(QemuOpts *opts, const char *params, +                          const char *firstname, bool prepend, Error **errp) +{ +    char option[128], value[1024]; +    const char *p,*pe,*pc; +    Error *local_err = NULL; + +    for (p = params; *p != '\0'; p++) { +        pe = strchr(p, '='); +        pc = strchr(p, ','); +        if (!pe || (pc && pc < pe)) { +            /* found "foo,more" */ +            if (p == params && firstname) { +                /* implicitly named first option */ +                pstrcpy(option, sizeof(option), firstname); +                p = get_opt_value(value, sizeof(value), p); +            } else { +                /* option without value, probably a flag */ +                p = get_opt_name(option, sizeof(option), p, ','); +                if (strncmp(option, "no", 2) == 0) { +                    memmove(option, option+2, strlen(option+2)+1); +                    pstrcpy(value, sizeof(value), "off"); +                } else { +                    pstrcpy(value, sizeof(value), "on"); +                } +            } +        } else { +            /* found "foo=bar,more" */ +            p = get_opt_name(option, sizeof(option), p, '='); +            if (*p != '=') { +                break; +            } +            p++; +            p = get_opt_value(value, sizeof(value), p); +        } +        if (strcmp(option, "id") != 0) { +            /* store and parse */ +            opt_set(opts, option, value, prepend, &local_err); +            if (local_err) { +                error_propagate(errp, local_err); +                return; +            } +        } +        if (*p != ',') { +            break; +        } +    } +} + +/** + * Store options parsed from @params into @opts. + * If @firstname is non-null, the first key=value in @params may omit + * key=, and is treated as if key was @firstname. + * On error, store an error object through @errp if non-null. + */ +void qemu_opts_do_parse(QemuOpts *opts, const char *params, +                       const char *firstname, Error **errp) +{ +    opts_do_parse(opts, params, firstname, false, errp); +} + +static QemuOpts *opts_parse(QemuOptsList *list, const char *params, +                            bool permit_abbrev, bool defaults, Error **errp) +{ +    const char *firstname; +    char value[1024], *id = NULL; +    const char *p; +    QemuOpts *opts; +    Error *local_err = NULL; + +    assert(!permit_abbrev || list->implied_opt_name); +    firstname = permit_abbrev ? list->implied_opt_name : NULL; + +    if (strncmp(params, "id=", 3) == 0) { +        get_opt_value(value, sizeof(value), params+3); +        id = value; +    } else if ((p = strstr(params, ",id=")) != NULL) { +        get_opt_value(value, sizeof(value), p+4); +        id = value; +    } + +    /* +     * This code doesn't work for defaults && !list->merge_lists: when +     * params has no id=, and list has an element with !opts->id, it +     * appends a new element instead of returning the existing opts. +     * However, we got no use for this case.  Guard against possible +     * (if unlikely) future misuse: +     */ +    assert(!defaults || list->merge_lists); +    opts = qemu_opts_create(list, id, !defaults, &local_err); +    if (opts == NULL) { +        error_propagate(errp, local_err); +        return NULL; +    } + +    opts_do_parse(opts, params, firstname, defaults, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        qemu_opts_del(opts); +        return NULL; +    } + +    return opts; +} + +/** + * Create a QemuOpts in @list and with options parsed from @params. + * If @permit_abbrev, the first key=value in @params may omit key=, + * and is treated as if key was @list->implied_opt_name. + * On error, store an error object through @errp if non-null. + * Return the new QemuOpts on success, null pointer on error. + */ +QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params, +                          bool permit_abbrev, Error **errp) +{ +    return opts_parse(list, params, permit_abbrev, false, errp); +} + +/** + * Create a QemuOpts in @list and with options parsed from @params. + * If @permit_abbrev, the first key=value in @params may omit key=, + * and is treated as if key was @list->implied_opt_name. + * Report errors with error_report_err().  This is inappropriate in + * QMP context.  Do not use this function there! + * Return the new QemuOpts on success, null pointer on error. + */ +QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params, +                                  bool permit_abbrev) +{ +    Error *err = NULL; +    QemuOpts *opts; + +    opts = opts_parse(list, params, permit_abbrev, false, &err); +    if (err) { +        error_report_err(err); +    } +    return opts; +} + +void qemu_opts_set_defaults(QemuOptsList *list, const char *params, +                            int permit_abbrev) +{ +    QemuOpts *opts; + +    opts = opts_parse(list, params, permit_abbrev, true, NULL); +    assert(opts); +} + +typedef struct OptsFromQDictState { +    QemuOpts *opts; +    Error **errp; +} OptsFromQDictState; + +static void qemu_opts_from_qdict_1(const char *key, QObject *obj, void *opaque) +{ +    OptsFromQDictState *state = opaque; +    char buf[32]; +    const char *value; +    int n; + +    if (!strcmp(key, "id") || *state->errp) { +        return; +    } + +    switch (qobject_type(obj)) { +    case QTYPE_QSTRING: +        value = qstring_get_str(qobject_to_qstring(obj)); +        break; +    case QTYPE_QINT: +        n = snprintf(buf, sizeof(buf), "%" PRId64, +                     qint_get_int(qobject_to_qint(obj))); +        assert(n < sizeof(buf)); +        value = buf; +        break; +    case QTYPE_QFLOAT: +        n = snprintf(buf, sizeof(buf), "%.17g", +                     qfloat_get_double(qobject_to_qfloat(obj))); +        assert(n < sizeof(buf)); +        value = buf; +        break; +    case QTYPE_QBOOL: +        pstrcpy(buf, sizeof(buf), +                qbool_get_bool(qobject_to_qbool(obj)) ? "on" : "off"); +        value = buf; +        break; +    default: +        return; +    } + +    qemu_opt_set(state->opts, key, value, state->errp); +} + +/* + * Create QemuOpts from a QDict. + * Use value of key "id" as ID if it exists and is a QString. + * Only QStrings, QInts, QFloats and QBools are copied.  Entries with + * other types are silently ignored. + */ +QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict, +                               Error **errp) +{ +    OptsFromQDictState state; +    Error *local_err = NULL; +    QemuOpts *opts; + +    opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1, +                            &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        return NULL; +    } + +    assert(opts != NULL); + +    state.errp = &local_err; +    state.opts = opts; +    qdict_iter(qdict, qemu_opts_from_qdict_1, &state); +    if (local_err) { +        error_propagate(errp, local_err); +        qemu_opts_del(opts); +        return NULL; +    } + +    return opts; +} + +/* + * Adds all QDict entries to the QemuOpts that can be added and removes them + * from the QDict. When this function returns, the QDict contains only those + * entries that couldn't be added to the QemuOpts. + */ +void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp) +{ +    const QDictEntry *entry, *next; + +    entry = qdict_first(qdict); + +    while (entry != NULL) { +        Error *local_err = NULL; +        OptsFromQDictState state = { +            .errp = &local_err, +            .opts = opts, +        }; + +        next = qdict_next(qdict, entry); + +        if (find_desc_by_name(opts->list->desc, entry->key)) { +            qemu_opts_from_qdict_1(entry->key, entry->value, &state); +            if (local_err) { +                error_propagate(errp, local_err); +                return; +            } else { +                qdict_del(qdict, entry->key); +            } +        } + +        entry = next; +    } +} + +/* + * Convert from QemuOpts to QDict. + * The QDict values are of type QString. + * TODO We'll want to use types appropriate for opt->desc->type, but + * this is enough for now. + */ +QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict) +{ +    QemuOpt *opt; +    QObject *val; + +    if (!qdict) { +        qdict = qdict_new(); +    } +    if (opts->id) { +        qdict_put(qdict, "id", qstring_from_str(opts->id)); +    } +    QTAILQ_FOREACH(opt, &opts->head, next) { +        val = QOBJECT(qstring_from_str(opt->str)); +        qdict_put_obj(qdict, opt->name, val); +    } +    return qdict; +} + +/* Validate parsed opts against descriptions where no + * descriptions were provided in the QemuOptsList. + */ +void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp) +{ +    QemuOpt *opt; +    Error *local_err = NULL; + +    assert(opts_accepts_any(opts)); + +    QTAILQ_FOREACH(opt, &opts->head, next) { +        opt->desc = find_desc_by_name(desc, opt->name); +        if (!opt->desc) { +            error_setg(errp, QERR_INVALID_PARAMETER, opt->name); +            return; +        } + +        qemu_opt_parse(opt, &local_err); +        if (local_err) { +            error_propagate(errp, local_err); +            return; +        } +    } +} + +/** + * For each member of @list, call @func(@opaque, member, @errp). + * Call it with the current location temporarily set to the member's. + * @func() may store an Error through @errp, but must return non-zero then. + * When @func() returns non-zero, break the loop and return that value. + * Return zero when the loop completes. + */ +int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, +                      void *opaque, Error **errp) +{ +    Location loc; +    QemuOpts *opts; +    int rc; + +    loc_push_none(&loc); +    QTAILQ_FOREACH(opts, &list->head, next) { +        loc_restore(&opts->loc); +        rc = func(opaque, opts, errp); +        if (rc) { +            return rc; +        } +        assert(!errp || !*errp); +    } +    loc_pop(&loc); +    return 0; +} + +static size_t count_opts_list(QemuOptsList *list) +{ +    QemuOptDesc *desc = NULL; +    size_t num_opts = 0; + +    if (!list) { +        return 0; +    } + +    desc = list->desc; +    while (desc && desc->name) { +        num_opts++; +        desc++; +    } + +    return num_opts; +} + +void qemu_opts_free(QemuOptsList *list) +{ +    g_free(list); +} + +/* Realloc dst option list and append options from an option list (list) + * to it. dst could be NULL or a malloced list. + * The lifetime of dst must be shorter than the input list because the + * QemuOptDesc->name, ->help, and ->def_value_str strings are shared. + */ +QemuOptsList *qemu_opts_append(QemuOptsList *dst, +                               QemuOptsList *list) +{ +    size_t num_opts, num_dst_opts; +    QemuOptDesc *desc; +    bool need_init = false; +    bool need_head_update; + +    if (!list) { +        return dst; +    } + +    /* If dst is NULL, after realloc, some area of dst should be initialized +     * before adding options to it. +     */ +    if (!dst) { +        need_init = true; +        need_head_update = true; +    } else { +        /* Moreover, even if dst is not NULL, the realloc may move it to a +         * different address in which case we may get a stale tail pointer +         * in dst->head. */ +        need_head_update = QTAILQ_EMPTY(&dst->head); +    } + +    num_opts = count_opts_list(dst); +    num_dst_opts = num_opts; +    num_opts += count_opts_list(list); +    dst = g_realloc(dst, sizeof(QemuOptsList) + +                    (num_opts + 1) * sizeof(QemuOptDesc)); +    if (need_init) { +        dst->name = NULL; +        dst->implied_opt_name = NULL; +        dst->merge_lists = false; +    } +    if (need_head_update) { +        QTAILQ_INIT(&dst->head); +    } +    dst->desc[num_dst_opts].name = NULL; + +    /* append list->desc to dst->desc */ +    if (list) { +        desc = list->desc; +        while (desc && desc->name) { +            if (find_desc_by_name(dst->desc, desc->name) == NULL) { +                dst->desc[num_dst_opts++] = *desc; +                dst->desc[num_dst_opts].name = NULL; +            } +            desc++; +        } +    } + +    return dst; +} diff --git a/util/qemu-progress.c b/util/qemu-progress.c new file mode 100644 index 00000000..4ee5cd07 --- /dev/null +++ b/util/qemu-progress.c @@ -0,0 +1,159 @@ +/* + * QEMU progress printing utility functions + * + * Copyright (C) 2011 Jes Sorensen <Jes.Sorensen@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/osdep.h" +#include <stdio.h> + +struct progress_state { +    float current; +    float last_print; +    float min_skip; +    void (*print)(void); +    void (*end)(void); +}; + +static struct progress_state state; +static volatile sig_atomic_t print_pending; + +/* + * Simple progress print function. + * @percent relative percent of current operation + * @max percent of total operation + */ +static void progress_simple_print(void) +{ +    printf("    (%3.2f/100%%)\r", state.current); +    fflush(stdout); +} + +static void progress_simple_end(void) +{ +    printf("\n"); +} + +static void progress_simple_init(void) +{ +    state.print = progress_simple_print; +    state.end = progress_simple_end; +} + +#ifdef CONFIG_POSIX +static void sigusr_print(int signal) +{ +    print_pending = 1; +} +#endif + +static void progress_dummy_print(void) +{ +    if (print_pending) { +        fprintf(stderr, "    (%3.2f/100%%)\n", state.current); +        print_pending = 0; +    } +} + +static void progress_dummy_end(void) +{ +} + +static void progress_dummy_init(void) +{ +#ifdef CONFIG_POSIX +    struct sigaction action; +    sigset_t set; + +    memset(&action, 0, sizeof(action)); +    sigfillset(&action.sa_mask); +    action.sa_handler = sigusr_print; +    action.sa_flags = 0; +    sigaction(SIGUSR1, &action, NULL); + +    /* +     * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the +     * tools that use the progress report SIGUSR1 isn't used in this meaning +     * and instead should print the progress, so reenable it. +     */ +    sigemptyset(&set); +    sigaddset(&set, SIGUSR1); +    pthread_sigmask(SIG_UNBLOCK, &set, NULL); +#endif + +    state.print = progress_dummy_print; +    state.end = progress_dummy_end; +} + +/* + * Initialize progress reporting. + * If @enabled is false, actual reporting is suppressed.  The user can + * still trigger a report by sending a SIGUSR1. + * Reports are also suppressed unless we've had at least @min_skip + * percent progress since the last report. + */ +void qemu_progress_init(int enabled, float min_skip) +{ +    state.min_skip = min_skip; +    if (enabled) { +        progress_simple_init(); +    } else { +        progress_dummy_init(); +    } +} + +void qemu_progress_end(void) +{ +    state.end(); +} + +/* + * Report progress. + * @delta is how much progress we made. + * If @max is zero, @delta is an absolut value of the total job done. + * Else, @delta is a progress delta since the last call, as a fraction + * of @max.  I.e. the delta is @delta * @max / 100. This allows + * relative accounting of functions which may be a different fraction of + * the full job, depending on the context they are called in. I.e. + * a function might be considered 40% of the full job if used from + * bdrv_img_create() but only 20% if called from img_convert(). + */ +void qemu_progress_print(float delta, int max) +{ +    float current; + +    if (max == 0) { +        current = delta; +    } else { +        current = state.current + delta / 100 * max; +    } +    if (current > 100) { +        current = 100; +    } +    state.current = current; + +    if (current > (state.last_print + state.min_skip) || +        (current == 100) || (current == 0)) { +        state.last_print = state.current; +        state.print(); +    } +} diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c new file mode 100644 index 00000000..2add83a0 --- /dev/null +++ b/util/qemu-sockets.c @@ -0,0 +1,1017 @@ +/* + *  inet and unix socket functions for qemu + * + *  (c) 2008 Gerd Hoffmann <kraxel@redhat.com> + * + *  This program is free software; you can redistribute it and/or modify + *  it under the terms of the GNU General Public License as published by + *  the Free Software Foundation; under version 2 of the License. + * + *  This program is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + *  GNU General Public License for more details. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <unistd.h> + +#include "monitor/monitor.h" +#include "qemu/sockets.h" +#include "qemu/main-loop.h" + +#ifndef AI_ADDRCONFIG +# define AI_ADDRCONFIG 0 +#endif +#ifndef AI_V4MAPPED +# define AI_V4MAPPED 0 +#endif + +/* used temporarily until all users are converted to QemuOpts */ +QemuOptsList socket_optslist = { +    .name = "socket", +    .head = QTAILQ_HEAD_INITIALIZER(socket_optslist.head), +    .desc = { +        { +            .name = "path", +            .type = QEMU_OPT_STRING, +        },{ +            .name = "host", +            .type = QEMU_OPT_STRING, +        },{ +            .name = "port", +            .type = QEMU_OPT_STRING, +        },{ +            .name = "localaddr", +            .type = QEMU_OPT_STRING, +        },{ +            .name = "localport", +            .type = QEMU_OPT_STRING, +        },{ +            .name = "to", +            .type = QEMU_OPT_NUMBER, +        },{ +            .name = "ipv4", +            .type = QEMU_OPT_BOOL, +        },{ +            .name = "ipv6", +            .type = QEMU_OPT_BOOL, +        }, +        { /* end if list */ } +    }, +}; + +static int inet_getport(struct addrinfo *e) +{ +    struct sockaddr_in *i4; +    struct sockaddr_in6 *i6; + +    switch (e->ai_family) { +    case PF_INET6: +        i6 = (void*)e->ai_addr; +        return ntohs(i6->sin6_port); +    case PF_INET: +        i4 = (void*)e->ai_addr; +        return ntohs(i4->sin_port); +    default: +        return 0; +    } +} + +static void inet_setport(struct addrinfo *e, int port) +{ +    struct sockaddr_in *i4; +    struct sockaddr_in6 *i6; + +    switch (e->ai_family) { +    case PF_INET6: +        i6 = (void*)e->ai_addr; +        i6->sin6_port = htons(port); +        break; +    case PF_INET: +        i4 = (void*)e->ai_addr; +        i4->sin_port = htons(port); +        break; +    } +} + +NetworkAddressFamily inet_netfamily(int family) +{ +    switch (family) { +    case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6; +    case PF_INET:  return NETWORK_ADDRESS_FAMILY_IPV4; +    case PF_UNIX:  return NETWORK_ADDRESS_FAMILY_UNIX; +    } +    return NETWORK_ADDRESS_FAMILY_UNKNOWN; +} + +int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp) +{ +    struct addrinfo ai,*res,*e; +    const char *addr; +    char port[33]; +    char uaddr[INET6_ADDRSTRLEN+1]; +    char uport[33]; +    int slisten, rc, to, port_min, port_max, p; + +    memset(&ai,0, sizeof(ai)); +    ai.ai_flags = AI_PASSIVE; +    ai.ai_family = PF_UNSPEC; +    ai.ai_socktype = SOCK_STREAM; + +    if ((qemu_opt_get(opts, "host") == NULL) || +        (qemu_opt_get(opts, "port") == NULL)) { +        error_setg(errp, "host and/or port not specified"); +        return -1; +    } +    pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port")); +    addr = qemu_opt_get(opts, "host"); + +    to = qemu_opt_get_number(opts, "to", 0); +    if (qemu_opt_get_bool(opts, "ipv4", 0)) +        ai.ai_family = PF_INET; +    if (qemu_opt_get_bool(opts, "ipv6", 0)) +        ai.ai_family = PF_INET6; + +    /* lookup */ +    if (port_offset) { +        unsigned long long baseport; +        if (parse_uint_full(port, &baseport, 10) < 0) { +            error_setg(errp, "can't convert to a number: %s", port); +            return -1; +        } +        if (baseport > 65535 || +            baseport + port_offset > 65535) { +            error_setg(errp, "port %s out of range", port); +            return -1; +        } +        snprintf(port, sizeof(port), "%d", (int)baseport + port_offset); +    } +    rc = getaddrinfo(strlen(addr) ? addr : NULL, port, &ai, &res); +    if (rc != 0) { +        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, +                   gai_strerror(rc)); +        return -1; +    } + +    /* create socket + bind */ +    for (e = res; e != NULL; e = e->ai_next) { +        getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen, +		        uaddr,INET6_ADDRSTRLEN,uport,32, +		        NI_NUMERICHOST | NI_NUMERICSERV); +        slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); +        if (slisten < 0) { +            if (!e->ai_next) { +                error_setg_errno(errp, errno, "Failed to create socket"); +            } +            continue; +        } + +        socket_set_fast_reuse(slisten); +#ifdef IPV6_V6ONLY +        if (e->ai_family == PF_INET6) { +            /* listen on both ipv4 and ipv6 */ +            const int off = 0; +            qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &off, +                            sizeof(off)); +        } +#endif + +        port_min = inet_getport(e); +        port_max = to ? to + port_offset : port_min; +        for (p = port_min; p <= port_max; p++) { +            inet_setport(e, p); +            if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) { +                goto listen; +            } +            if (p == port_max) { +                if (!e->ai_next) { +                    error_setg_errno(errp, errno, "Failed to bind socket"); +                } +            } +        } +        closesocket(slisten); +    } +    freeaddrinfo(res); +    return -1; + +listen: +    if (listen(slisten,1) != 0) { +        error_setg_errno(errp, errno, "Failed to listen on socket"); +        closesocket(slisten); +        freeaddrinfo(res); +        return -1; +    } +    qemu_opt_set(opts, "host", uaddr, &error_abort); +    qemu_opt_set_number(opts, "port", inet_getport(e) - port_offset, +                        &error_abort); +    qemu_opt_set_bool(opts, "ipv6", e->ai_family == PF_INET6, +                      &error_abort); +    qemu_opt_set_bool(opts, "ipv4", e->ai_family != PF_INET6, +                      &error_abort); +    freeaddrinfo(res); +    return slisten; +} + +#ifdef _WIN32 +#define QEMU_SOCKET_RC_INPROGRESS(rc) \ +    ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY) +#else +#define QEMU_SOCKET_RC_INPROGRESS(rc) \ +    ((rc) == -EINPROGRESS) +#endif + +/* Struct to store connect state for non blocking connect */ +typedef struct ConnectState { +    int fd; +    struct addrinfo *addr_list; +    struct addrinfo *current_addr; +    NonBlockingConnectHandler *callback; +    void *opaque; +} ConnectState; + +static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, +                             ConnectState *connect_state, Error **errp); + +static void wait_for_connect(void *opaque) +{ +    ConnectState *s = opaque; +    int val = 0, rc = 0; +    socklen_t valsize = sizeof(val); +    bool in_progress; +    Error *err = NULL; + +    qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + +    do { +        rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize); +    } while (rc == -1 && socket_error() == EINTR); + +    /* update rc to contain error */ +    if (!rc && val) { +        rc = -1; +        errno = val; +    } + +    /* connect error */ +    if (rc < 0) { +        error_setg_errno(&err, errno, "Error connecting to socket"); +        closesocket(s->fd); +        s->fd = rc; +    } + +    /* try to connect to the next address on the list */ +    if (s->current_addr) { +        while (s->current_addr->ai_next != NULL && s->fd < 0) { +            s->current_addr = s->current_addr->ai_next; +            s->fd = inet_connect_addr(s->current_addr, &in_progress, s, NULL); +            if (s->fd < 0) { +                error_free(err); +                err = NULL; +                error_setg_errno(&err, errno, "Unable to start socket connect"); +            } +            /* connect in progress */ +            if (in_progress) { +                goto out; +            } +        } + +        freeaddrinfo(s->addr_list); +    } + +    if (s->callback) { +        s->callback(s->fd, err, s->opaque); +    } +    g_free(s); +out: +    error_free(err); +} + +static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, +                             ConnectState *connect_state, Error **errp) +{ +    int sock, rc; + +    *in_progress = false; + +    sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); +    if (sock < 0) { +        error_setg_errno(errp, errno, "Failed to create socket"); +        return -1; +    } +    socket_set_fast_reuse(sock); +    if (connect_state != NULL) { +        qemu_set_nonblock(sock); +    } +    /* connect to peer */ +    do { +        rc = 0; +        if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) { +            rc = -socket_error(); +        } +    } while (rc == -EINTR); + +    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) { +        connect_state->fd = sock; +        qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state); +        *in_progress = true; +    } else if (rc < 0) { +        error_setg_errno(errp, errno, "Failed to connect socket"); +        closesocket(sock); +        return -1; +    } +    return sock; +} + +static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp) +{ +    struct addrinfo ai, *res; +    int rc; +    const char *addr; +    const char *port; + +    memset(&ai, 0, sizeof(ai)); + +    ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; +    ai.ai_family = PF_UNSPEC; +    ai.ai_socktype = SOCK_STREAM; + +    addr = qemu_opt_get(opts, "host"); +    port = qemu_opt_get(opts, "port"); +    if (addr == NULL || port == NULL) { +        error_setg(errp, "host and/or port not specified"); +        return NULL; +    } + +    if (qemu_opt_get_bool(opts, "ipv4", 0)) { +        ai.ai_family = PF_INET; +    } +    if (qemu_opt_get_bool(opts, "ipv6", 0)) { +        ai.ai_family = PF_INET6; +    } + +    /* lookup */ +    rc = getaddrinfo(addr, port, &ai, &res); +    if (rc != 0) { +        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, +                   gai_strerror(rc)); +        return NULL; +    } +    return res; +} + +/** + * Create a socket and connect it to an address. + * + * @opts: QEMU options, recognized parameters strings "host" and "port", + *        bools "ipv4" and "ipv6". + * @errp: set on error + * @callback: callback function for non-blocking connect + * @opaque: opaque for callback function + * + * Returns: -1 on error, file descriptor on success. + * + * If @callback is non-null, the connect is non-blocking.  If this + * function succeeds, callback will be called when the connection + * completes, with the file descriptor on success, or -1 on error. + */ +int inet_connect_opts(QemuOpts *opts, Error **errp, +                      NonBlockingConnectHandler *callback, void *opaque) +{ +    Error *local_err = NULL; +    struct addrinfo *res, *e; +    int sock = -1; +    bool in_progress; +    ConnectState *connect_state = NULL; + +    res = inet_parse_connect_opts(opts, errp); +    if (!res) { +        return -1; +    } + +    if (callback != NULL) { +        connect_state = g_malloc0(sizeof(*connect_state)); +        connect_state->addr_list = res; +        connect_state->callback = callback; +        connect_state->opaque = opaque; +    } + +    for (e = res; e != NULL; e = e->ai_next) { +        error_free(local_err); +        local_err = NULL; +        if (connect_state != NULL) { +            connect_state->current_addr = e; +        } +        sock = inet_connect_addr(e, &in_progress, connect_state, &local_err); +        if (sock >= 0) { +            break; +        } +    } + +    if (sock < 0) { +        error_propagate(errp, local_err); +    } else if (in_progress) { +        /* wait_for_connect() will do the rest */ +        return sock; +    } else { +        if (callback) { +            callback(sock, NULL, opaque); +        } +    } +    g_free(connect_state); +    freeaddrinfo(res); +    return sock; +} + +int inet_dgram_opts(QemuOpts *opts, Error **errp) +{ +    struct addrinfo ai, *peer = NULL, *local = NULL; +    const char *addr; +    const char *port; +    int sock = -1, rc; + +    /* lookup peer addr */ +    memset(&ai,0, sizeof(ai)); +    ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; +    ai.ai_family = PF_UNSPEC; +    ai.ai_socktype = SOCK_DGRAM; + +    addr = qemu_opt_get(opts, "host"); +    port = qemu_opt_get(opts, "port"); +    if (addr == NULL || strlen(addr) == 0) { +        addr = "localhost"; +    } +    if (port == NULL || strlen(port) == 0) { +        error_setg(errp, "remote port not specified"); +        return -1; +    } + +    if (qemu_opt_get_bool(opts, "ipv4", 0)) +        ai.ai_family = PF_INET; +    if (qemu_opt_get_bool(opts, "ipv6", 0)) +        ai.ai_family = PF_INET6; + +    if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) { +        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, +                   gai_strerror(rc)); +	return -1; +    } + +    /* lookup local addr */ +    memset(&ai,0, sizeof(ai)); +    ai.ai_flags = AI_PASSIVE; +    ai.ai_family = peer->ai_family; +    ai.ai_socktype = SOCK_DGRAM; + +    addr = qemu_opt_get(opts, "localaddr"); +    port = qemu_opt_get(opts, "localport"); +    if (addr == NULL || strlen(addr) == 0) { +        addr = NULL; +    } +    if (!port || strlen(port) == 0) +        port = "0"; + +    if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) { +        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, +                   gai_strerror(rc)); +        goto err; +    } + +    /* create socket */ +    sock = qemu_socket(peer->ai_family, peer->ai_socktype, peer->ai_protocol); +    if (sock < 0) { +        error_setg_errno(errp, errno, "Failed to create socket"); +        goto err; +    } +    socket_set_fast_reuse(sock); + +    /* bind socket */ +    if (bind(sock, local->ai_addr, local->ai_addrlen) < 0) { +        error_setg_errno(errp, errno, "Failed to bind socket"); +        goto err; +    } + +    /* connect to peer */ +    if (connect(sock,peer->ai_addr,peer->ai_addrlen) < 0) { +        error_setg_errno(errp, errno, "Failed to connect socket"); +        goto err; +    } + +    freeaddrinfo(local); +    freeaddrinfo(peer); +    return sock; + +err: +    if (-1 != sock) +        closesocket(sock); +    if (local) +        freeaddrinfo(local); +    if (peer) +        freeaddrinfo(peer); +    return -1; +} + +/* compatibility wrapper */ +InetSocketAddress *inet_parse(const char *str, Error **errp) +{ +    InetSocketAddress *addr; +    const char *optstr, *h; +    char host[65]; +    char port[33]; +    int to; +    int pos; + +    addr = g_new0(InetSocketAddress, 1); + +    /* parse address */ +    if (str[0] == ':') { +        /* no host given */ +        host[0] = '\0'; +        if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) { +            error_setg(errp, "error parsing port in address '%s'", str); +            goto fail; +        } +    } else if (str[0] == '[') { +        /* IPv6 addr */ +        if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) { +            error_setg(errp, "error parsing IPv6 address '%s'", str); +            goto fail; +        } +        addr->ipv6 = addr->has_ipv6 = true; +    } else { +        /* hostname or IPv4 addr */ +        if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) { +            error_setg(errp, "error parsing address '%s'", str); +            goto fail; +        } +        if (host[strspn(host, "0123456789.")] == '\0') { +            addr->ipv4 = addr->has_ipv4 = true; +        } +    } + +    addr->host = g_strdup(host); +    addr->port = g_strdup(port); + +    /* parse options */ +    optstr = str + pos; +    h = strstr(optstr, ",to="); +    if (h) { +        h += 4; +        if (sscanf(h, "%d%n", &to, &pos) != 1 || +            (h[pos] != '\0' && h[pos] != ',')) { +            error_setg(errp, "error parsing to= argument"); +            goto fail; +        } +        addr->has_to = true; +        addr->to = to; +    } +    if (strstr(optstr, ",ipv4")) { +        addr->ipv4 = addr->has_ipv4 = true; +    } +    if (strstr(optstr, ",ipv6")) { +        addr->ipv6 = addr->has_ipv6 = true; +    } +    return addr; + +fail: +    qapi_free_InetSocketAddress(addr); +    return NULL; +} + +static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr) +{ +    bool ipv4 = addr->ipv4 || !addr->has_ipv4; +    bool ipv6 = addr->ipv6 || !addr->has_ipv6; + +    if (!ipv4 || !ipv6) { +        qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort); +        qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort); +    } +    if (addr->has_to) { +        qemu_opt_set_number(opts, "to", addr->to, &error_abort); +    } +    qemu_opt_set(opts, "host", addr->host, &error_abort); +    qemu_opt_set(opts, "port", addr->port, &error_abort); +} + +int inet_listen(const char *str, char *ostr, int olen, +                int socktype, int port_offset, Error **errp) +{ +    QemuOpts *opts; +    char *optstr; +    int sock = -1; +    InetSocketAddress *addr; + +    addr = inet_parse(str, errp); +    if (addr != NULL) { +        opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +        inet_addr_to_opts(opts, addr); +        qapi_free_InetSocketAddress(addr); +        sock = inet_listen_opts(opts, port_offset, errp); +        if (sock != -1 && ostr) { +            optstr = strchr(str, ','); +            if (qemu_opt_get_bool(opts, "ipv6", 0)) { +                snprintf(ostr, olen, "[%s]:%s%s", +                         qemu_opt_get(opts, "host"), +                         qemu_opt_get(opts, "port"), +                         optstr ? optstr : ""); +            } else { +                snprintf(ostr, olen, "%s:%s%s", +                         qemu_opt_get(opts, "host"), +                         qemu_opt_get(opts, "port"), +                         optstr ? optstr : ""); +            } +        } +        qemu_opts_del(opts); +    } +    return sock; +} + +/** + * Create a blocking socket and connect it to an address. + * + * @str: address string + * @errp: set in case of an error + * + * Returns -1 in case of error, file descriptor on success + **/ +int inet_connect(const char *str, Error **errp) +{ +    QemuOpts *opts; +    int sock = -1; +    InetSocketAddress *addr; + +    addr = inet_parse(str, errp); +    if (addr != NULL) { +        opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +        inet_addr_to_opts(opts, addr); +        qapi_free_InetSocketAddress(addr); +        sock = inet_connect_opts(opts, errp, NULL, NULL); +        qemu_opts_del(opts); +    } +    return sock; +} + +/** + * Create a non-blocking socket and connect it to an address. + * Calls the callback function with fd in case of success or -1 in case of + * error. + * + * @str: address string + * @callback: callback function that is called when connect completes, + *            cannot be NULL. + * @opaque: opaque for callback function + * @errp: set in case of an error + * + * Returns: -1 on immediate error, file descriptor on success. + **/ +int inet_nonblocking_connect(const char *str, +                             NonBlockingConnectHandler *callback, +                             void *opaque, Error **errp) +{ +    QemuOpts *opts; +    int sock = -1; +    InetSocketAddress *addr; + +    g_assert(callback != NULL); + +    addr = inet_parse(str, errp); +    if (addr != NULL) { +        opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +        inet_addr_to_opts(opts, addr); +        qapi_free_InetSocketAddress(addr); +        sock = inet_connect_opts(opts, errp, callback, opaque); +        qemu_opts_del(opts); +    } +    return sock; +} + +#ifndef _WIN32 + +int unix_listen_opts(QemuOpts *opts, Error **errp) +{ +    struct sockaddr_un un; +    const char *path = qemu_opt_get(opts, "path"); +    int sock, fd; + +    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0); +    if (sock < 0) { +        error_setg_errno(errp, errno, "Failed to create Unix socket"); +        return -1; +    } + +    memset(&un, 0, sizeof(un)); +    un.sun_family = AF_UNIX; +    if (path && strlen(path)) { +        snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); +    } else { +        const char *tmpdir = getenv("TMPDIR"); +        tmpdir = tmpdir ? tmpdir : "/tmp"; +        if (snprintf(un.sun_path, sizeof(un.sun_path), "%s/qemu-socket-XXXXXX", +                     tmpdir) >= sizeof(un.sun_path)) { +            error_setg_errno(errp, errno, +                             "TMPDIR environment variable (%s) too large", tmpdir); +            goto err; +        } + +        /* +         * This dummy fd usage silences the mktemp() unsecure warning. +         * Using mkstemp() doesn't make things more secure here +         * though.  bind() complains about existing files, so we have +         * to unlink first and thus re-open the race window.  The +         * worst case possible is bind() failing, i.e. a DoS attack. +         */ +        fd = mkstemp(un.sun_path); +        if (fd < 0) { +            error_setg_errno(errp, errno, +                             "Failed to make a temporary socket name in %s", tmpdir); +            goto err; +        } +        close(fd); +        qemu_opt_set(opts, "path", un.sun_path, &error_abort); +    } + +    if ((access(un.sun_path, F_OK) == 0) && +        unlink(un.sun_path) < 0) { +        error_setg_errno(errp, errno, +                         "Failed to unlink socket %s", un.sun_path); +        goto err; +    } +    if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) { +        error_setg_errno(errp, errno, "Failed to bind socket to %s", un.sun_path); +        goto err; +    } +    if (listen(sock, 1) < 0) { +        error_setg_errno(errp, errno, "Failed to listen on socket"); +        goto err; +    } + +    return sock; + +err: +    closesocket(sock); +    return -1; +} + +int unix_connect_opts(QemuOpts *opts, Error **errp, +                      NonBlockingConnectHandler *callback, void *opaque) +{ +    struct sockaddr_un un; +    const char *path = qemu_opt_get(opts, "path"); +    ConnectState *connect_state = NULL; +    int sock, rc; + +    if (path == NULL) { +        error_setg(errp, "unix connect: no path specified"); +        return -1; +    } + +    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0); +    if (sock < 0) { +        error_setg_errno(errp, errno, "Failed to create socket"); +        return -1; +    } +    if (callback != NULL) { +        connect_state = g_malloc0(sizeof(*connect_state)); +        connect_state->callback = callback; +        connect_state->opaque = opaque; +        qemu_set_nonblock(sock); +    } + +    memset(&un, 0, sizeof(un)); +    un.sun_family = AF_UNIX; +    snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + +    /* connect to peer */ +    do { +        rc = 0; +        if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) { +            rc = -socket_error(); +        } +    } while (rc == -EINTR); + +    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) { +        connect_state->fd = sock; +        qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state); +        return sock; +    } else if (rc >= 0) { +        /* non blocking socket immediate success, call callback */ +        if (callback != NULL) { +            callback(sock, NULL, opaque); +        } +    } + +    if (rc < 0) { +        error_setg_errno(errp, -rc, "Failed to connect socket"); +        close(sock); +        sock = -1; +    } + +    g_free(connect_state); +    return sock; +} + +#else + +int unix_listen_opts(QemuOpts *opts, Error **errp) +{ +    error_setg(errp, "unix sockets are not available on windows"); +    errno = ENOTSUP; +    return -1; +} + +int unix_connect_opts(QemuOpts *opts, Error **errp, +                      NonBlockingConnectHandler *callback, void *opaque) +{ +    error_setg(errp, "unix sockets are not available on windows"); +    errno = ENOTSUP; +    return -1; +} +#endif + +/* compatibility wrapper */ +int unix_listen(const char *str, char *ostr, int olen, Error **errp) +{ +    QemuOpts *opts; +    char *path, *optstr; +    int sock, len; + +    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + +    optstr = strchr(str, ','); +    if (optstr) { +        len = optstr - str; +        if (len) { +            path = g_malloc(len+1); +            snprintf(path, len+1, "%.*s", len, str); +            qemu_opt_set(opts, "path", path, &error_abort); +            g_free(path); +        } +    } else { +        qemu_opt_set(opts, "path", str, &error_abort); +    } + +    sock = unix_listen_opts(opts, errp); + +    if (sock != -1 && ostr) +        snprintf(ostr, olen, "%s%s", qemu_opt_get(opts, "path"), optstr ? optstr : ""); +    qemu_opts_del(opts); +    return sock; +} + +int unix_connect(const char *path, Error **errp) +{ +    QemuOpts *opts; +    int sock; + +    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +    qemu_opt_set(opts, "path", path, &error_abort); +    sock = unix_connect_opts(opts, errp, NULL, NULL); +    qemu_opts_del(opts); +    return sock; +} + + +int unix_nonblocking_connect(const char *path, +                             NonBlockingConnectHandler *callback, +                             void *opaque, Error **errp) +{ +    QemuOpts *opts; +    int sock = -1; + +    g_assert(callback != NULL); + +    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +    qemu_opt_set(opts, "path", path, &error_abort); +    sock = unix_connect_opts(opts, errp, callback, opaque); +    qemu_opts_del(opts); +    return sock; +} + +SocketAddress *socket_parse(const char *str, Error **errp) +{ +    SocketAddress *addr; + +    addr = g_new0(SocketAddress, 1); +    if (strstart(str, "unix:", NULL)) { +        if (str[5] == '\0') { +            error_setg(errp, "invalid Unix socket address"); +            goto fail; +        } else { +            addr->kind = SOCKET_ADDRESS_KIND_UNIX; +            addr->q_unix = g_new(UnixSocketAddress, 1); +            addr->q_unix->path = g_strdup(str + 5); +        } +    } else if (strstart(str, "fd:", NULL)) { +        if (str[3] == '\0') { +            error_setg(errp, "invalid file descriptor address"); +            goto fail; +        } else { +            addr->kind = SOCKET_ADDRESS_KIND_FD; +            addr->fd = g_new(String, 1); +            addr->fd->str = g_strdup(str + 3); +        } +    } else { +        addr->kind = SOCKET_ADDRESS_KIND_INET; +        addr->inet = inet_parse(str, errp); +        if (addr->inet == NULL) { +            goto fail; +        } +    } +    return addr; + +fail: +    qapi_free_SocketAddress(addr); +    return NULL; +} + +int socket_connect(SocketAddress *addr, Error **errp, +                   NonBlockingConnectHandler *callback, void *opaque) +{ +    QemuOpts *opts; +    int fd; + +    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +    switch (addr->kind) { +    case SOCKET_ADDRESS_KIND_INET: +        inet_addr_to_opts(opts, addr->inet); +        fd = inet_connect_opts(opts, errp, callback, opaque); +        break; + +    case SOCKET_ADDRESS_KIND_UNIX: +        qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort); +        fd = unix_connect_opts(opts, errp, callback, opaque); +        break; + +    case SOCKET_ADDRESS_KIND_FD: +        fd = monitor_get_fd(cur_mon, addr->fd->str, errp); +        if (fd >= 0 && callback) { +            qemu_set_nonblock(fd); +            callback(fd, NULL, opaque); +        } +        break; + +    default: +        abort(); +    } +    qemu_opts_del(opts); +    return fd; +} + +int socket_listen(SocketAddress *addr, Error **errp) +{ +    QemuOpts *opts; +    int fd; + +    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +    switch (addr->kind) { +    case SOCKET_ADDRESS_KIND_INET: +        inet_addr_to_opts(opts, addr->inet); +        fd = inet_listen_opts(opts, 0, errp); +        break; + +    case SOCKET_ADDRESS_KIND_UNIX: +        qemu_opt_set(opts, "path", addr->q_unix->path, &error_abort); +        fd = unix_listen_opts(opts, errp); +        break; + +    case SOCKET_ADDRESS_KIND_FD: +        fd = monitor_get_fd(cur_mon, addr->fd->str, errp); +        break; + +    default: +        abort(); +    } +    qemu_opts_del(opts); +    return fd; +} + +int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp) +{ +    QemuOpts *opts; +    int fd; + +    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); +    switch (remote->kind) { +    case SOCKET_ADDRESS_KIND_INET: +        inet_addr_to_opts(opts, remote->inet); +        if (local) { +            qemu_opt_set(opts, "localaddr", local->inet->host, &error_abort); +            qemu_opt_set(opts, "localport", local->inet->port, &error_abort); +        } +        fd = inet_dgram_opts(opts, errp); +        break; + +    default: +        error_setg(errp, "socket type unsupported for datagram"); +        fd = -1; +    } +    qemu_opts_del(opts); +    return fd; +} diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c new file mode 100644 index 00000000..ba67cec6 --- /dev/null +++ b/util/qemu-thread-posix.c @@ -0,0 +1,509 @@ +/* + * Wrappers around mutex/cond/thread functions + * + * Copyright Red Hat, Inc. 2009 + * + * Author: + *  Marcelo Tosatti <mtosatti@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <signal.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <sys/time.h> +#ifdef __linux__ +#include <sys/syscall.h> +#include <linux/futex.h> +#endif +#include "qemu/thread.h" +#include "qemu/atomic.h" +#include "qemu/notify.h" + +static bool name_threads; + +void qemu_thread_naming(bool enable) +{ +    name_threads = enable; + +#ifndef CONFIG_THREAD_SETNAME_BYTHREAD +    /* This is a debugging option, not fatal */ +    if (enable) { +        fprintf(stderr, "qemu: thread naming not supported on this host\n"); +    } +#endif +} + +static void error_exit(int err, const char *msg) +{ +    fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err)); +    abort(); +} + +void qemu_mutex_init(QemuMutex *mutex) +{ +    int err; + +    err = pthread_mutex_init(&mutex->lock, NULL); +    if (err) +        error_exit(err, __func__); +} + +void qemu_mutex_destroy(QemuMutex *mutex) +{ +    int err; + +    err = pthread_mutex_destroy(&mutex->lock); +    if (err) +        error_exit(err, __func__); +} + +void qemu_mutex_lock(QemuMutex *mutex) +{ +    int err; + +    err = pthread_mutex_lock(&mutex->lock); +    if (err) +        error_exit(err, __func__); +} + +int qemu_mutex_trylock(QemuMutex *mutex) +{ +    return pthread_mutex_trylock(&mutex->lock); +} + +void qemu_mutex_unlock(QemuMutex *mutex) +{ +    int err; + +    err = pthread_mutex_unlock(&mutex->lock); +    if (err) +        error_exit(err, __func__); +} + +void qemu_cond_init(QemuCond *cond) +{ +    int err; + +    err = pthread_cond_init(&cond->cond, NULL); +    if (err) +        error_exit(err, __func__); +} + +void qemu_cond_destroy(QemuCond *cond) +{ +    int err; + +    err = pthread_cond_destroy(&cond->cond); +    if (err) +        error_exit(err, __func__); +} + +void qemu_cond_signal(QemuCond *cond) +{ +    int err; + +    err = pthread_cond_signal(&cond->cond); +    if (err) +        error_exit(err, __func__); +} + +void qemu_cond_broadcast(QemuCond *cond) +{ +    int err; + +    err = pthread_cond_broadcast(&cond->cond); +    if (err) +        error_exit(err, __func__); +} + +void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) +{ +    int err; + +    err = pthread_cond_wait(&cond->cond, &mutex->lock); +    if (err) +        error_exit(err, __func__); +} + +void qemu_sem_init(QemuSemaphore *sem, int init) +{ +    int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) +    rc = pthread_mutex_init(&sem->lock, NULL); +    if (rc != 0) { +        error_exit(rc, __func__); +    } +    rc = pthread_cond_init(&sem->cond, NULL); +    if (rc != 0) { +        error_exit(rc, __func__); +    } +    if (init < 0) { +        error_exit(EINVAL, __func__); +    } +    sem->count = init; +#else +    rc = sem_init(&sem->sem, 0, init); +    if (rc < 0) { +        error_exit(errno, __func__); +    } +#endif +} + +void qemu_sem_destroy(QemuSemaphore *sem) +{ +    int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) +    rc = pthread_cond_destroy(&sem->cond); +    if (rc < 0) { +        error_exit(rc, __func__); +    } +    rc = pthread_mutex_destroy(&sem->lock); +    if (rc < 0) { +        error_exit(rc, __func__); +    } +#else +    rc = sem_destroy(&sem->sem); +    if (rc < 0) { +        error_exit(errno, __func__); +    } +#endif +} + +void qemu_sem_post(QemuSemaphore *sem) +{ +    int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) +    pthread_mutex_lock(&sem->lock); +    if (sem->count == UINT_MAX) { +        rc = EINVAL; +    } else { +        sem->count++; +        rc = pthread_cond_signal(&sem->cond); +    } +    pthread_mutex_unlock(&sem->lock); +    if (rc != 0) { +        error_exit(rc, __func__); +    } +#else +    rc = sem_post(&sem->sem); +    if (rc < 0) { +        error_exit(errno, __func__); +    } +#endif +} + +static void compute_abs_deadline(struct timespec *ts, int ms) +{ +    struct timeval tv; +    gettimeofday(&tv, NULL); +    ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000; +    ts->tv_sec = tv.tv_sec + ms / 1000; +    if (ts->tv_nsec >= 1000000000) { +        ts->tv_sec++; +        ts->tv_nsec -= 1000000000; +    } +} + +int qemu_sem_timedwait(QemuSemaphore *sem, int ms) +{ +    int rc; +    struct timespec ts; + +#if defined(__APPLE__) || defined(__NetBSD__) +    rc = 0; +    compute_abs_deadline(&ts, ms); +    pthread_mutex_lock(&sem->lock); +    while (sem->count == 0) { +        rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts); +        if (rc == ETIMEDOUT) { +            break; +        } +        if (rc != 0) { +            error_exit(rc, __func__); +        } +    } +    if (rc != ETIMEDOUT) { +        --sem->count; +    } +    pthread_mutex_unlock(&sem->lock); +    return (rc == ETIMEDOUT ? -1 : 0); +#else +    if (ms <= 0) { +        /* This is cheaper than sem_timedwait.  */ +        do { +            rc = sem_trywait(&sem->sem); +        } while (rc == -1 && errno == EINTR); +        if (rc == -1 && errno == EAGAIN) { +            return -1; +        } +    } else { +        compute_abs_deadline(&ts, ms); +        do { +            rc = sem_timedwait(&sem->sem, &ts); +        } while (rc == -1 && errno == EINTR); +        if (rc == -1 && errno == ETIMEDOUT) { +            return -1; +        } +    } +    if (rc < 0) { +        error_exit(errno, __func__); +    } +    return 0; +#endif +} + +void qemu_sem_wait(QemuSemaphore *sem) +{ +    int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) +    pthread_mutex_lock(&sem->lock); +    while (sem->count == 0) { +        rc = pthread_cond_wait(&sem->cond, &sem->lock); +        if (rc != 0) { +            error_exit(rc, __func__); +        } +    } +    --sem->count; +    pthread_mutex_unlock(&sem->lock); +#else +    do { +        rc = sem_wait(&sem->sem); +    } while (rc == -1 && errno == EINTR); +    if (rc < 0) { +        error_exit(errno, __func__); +    } +#endif +} + +#ifdef __linux__ +#define futex(...)              syscall(__NR_futex, __VA_ARGS__) + +static inline void futex_wake(QemuEvent *ev, int n) +{ +    futex(ev, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void futex_wait(QemuEvent *ev, unsigned val) +{ +    futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0); +} +#else +static inline void futex_wake(QemuEvent *ev, int n) +{ +    pthread_mutex_lock(&ev->lock); +    if (n == 1) { +        pthread_cond_signal(&ev->cond); +    } else { +        pthread_cond_broadcast(&ev->cond); +    } +    pthread_mutex_unlock(&ev->lock); +} + +static inline void futex_wait(QemuEvent *ev, unsigned val) +{ +    pthread_mutex_lock(&ev->lock); +    if (ev->value == val) { +        pthread_cond_wait(&ev->cond, &ev->lock); +    } +    pthread_mutex_unlock(&ev->lock); +} +#endif + +/* Valid transitions: + * - free->set, when setting the event + * - busy->set, when setting the event, followed by futex_wake + * - set->free, when resetting the event + * - free->busy, when waiting + * + * set->busy does not happen (it can be observed from the outside but + * it really is set->free->busy). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy. + */ + +#define EV_SET         0 +#define EV_FREE        1 +#define EV_BUSY       -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ +#ifndef __linux__ +    pthread_mutex_init(&ev->lock, NULL); +    pthread_cond_init(&ev->cond, NULL); +#endif + +    ev->value = (init ? EV_SET : EV_FREE); +} + +void qemu_event_destroy(QemuEvent *ev) +{ +#ifndef __linux__ +    pthread_mutex_destroy(&ev->lock); +    pthread_cond_destroy(&ev->cond); +#endif +} + +void qemu_event_set(QemuEvent *ev) +{ +    if (atomic_mb_read(&ev->value) != EV_SET) { +        if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) { +            /* There were waiters, wake them up.  */ +            futex_wake(ev, INT_MAX); +        } +    } +} + +void qemu_event_reset(QemuEvent *ev) +{ +    if (atomic_mb_read(&ev->value) == EV_SET) { +        /* +         * If there was a concurrent reset (or even reset+wait), +         * do nothing.  Otherwise change EV_SET->EV_FREE. +         */ +        atomic_or(&ev->value, EV_FREE); +    } +} + +void qemu_event_wait(QemuEvent *ev) +{ +    unsigned value; + +    value = atomic_mb_read(&ev->value); +    if (value != EV_SET) { +        if (value == EV_FREE) { +            /* +             * Leave the event reset and tell qemu_event_set that there +             * are waiters.  No need to retry, because there cannot be +             * a concurent busy->free transition.  After the CAS, the +             * event will be either set or busy. +             */ +            if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { +                return; +            } +        } +        futex_wait(ev, EV_BUSY); +    } +} + +static pthread_key_t exit_key; + +union NotifierThreadData { +    void *ptr; +    NotifierList list; +}; +QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *)); + +void qemu_thread_atexit_add(Notifier *notifier) +{ +    union NotifierThreadData ntd; +    ntd.ptr = pthread_getspecific(exit_key); +    notifier_list_add(&ntd.list, notifier); +    pthread_setspecific(exit_key, ntd.ptr); +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ +    union NotifierThreadData ntd; +    ntd.ptr = pthread_getspecific(exit_key); +    notifier_remove(notifier); +    pthread_setspecific(exit_key, ntd.ptr); +} + +static void qemu_thread_atexit_run(void *arg) +{ +    union NotifierThreadData ntd = { .ptr = arg }; +    notifier_list_notify(&ntd.list, NULL); +} + +static void __attribute__((constructor)) qemu_thread_atexit_init(void) +{ +    pthread_key_create(&exit_key, qemu_thread_atexit_run); +} + + +/* Attempt to set the threads name; note that this is for debug, so + * we're not going to fail if we can't set it. + */ +static void qemu_thread_set_name(QemuThread *thread, const char *name) +{ +#ifdef CONFIG_PTHREAD_SETNAME_NP +    pthread_setname_np(thread->thread, name); +#endif +} + +void qemu_thread_create(QemuThread *thread, const char *name, +                       void *(*start_routine)(void*), +                       void *arg, int mode) +{ +    sigset_t set, oldset; +    int err; +    pthread_attr_t attr; + +    err = pthread_attr_init(&attr); +    if (err) { +        error_exit(err, __func__); +    } +    if (mode == QEMU_THREAD_DETACHED) { +        err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); +        if (err) { +            error_exit(err, __func__); +        } +    } + +    /* Leave signal handling to the iothread.  */ +    sigfillset(&set); +    pthread_sigmask(SIG_SETMASK, &set, &oldset); +    err = pthread_create(&thread->thread, &attr, start_routine, arg); +    if (err) +        error_exit(err, __func__); + +    if (name_threads) { +        qemu_thread_set_name(thread, name); +    } + +    pthread_sigmask(SIG_SETMASK, &oldset, NULL); + +    pthread_attr_destroy(&attr); +} + +void qemu_thread_get_self(QemuThread *thread) +{ +    thread->thread = pthread_self(); +} + +bool qemu_thread_is_self(QemuThread *thread) +{ +   return pthread_equal(pthread_self(), thread->thread); +} + +void qemu_thread_exit(void *retval) +{ +    pthread_exit(retval); +} + +void *qemu_thread_join(QemuThread *thread) +{ +    int err; +    void *ret; + +    err = pthread_join(thread->thread, &ret); +    if (err) { +        error_exit(err, __func__); +    } +    return ret; +} diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c new file mode 100644 index 00000000..406b52f9 --- /dev/null +++ b/util/qemu-thread-win32.c @@ -0,0 +1,421 @@ +/* + * Win32 implementation for mutex/cond/thread functions + * + * Copyright Red Hat, Inc. 2010 + * + * Author: + *  Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include "qemu-common.h" +#include "qemu/thread.h" +#include "qemu/notify.h" +#include <process.h> +#include <assert.h> +#include <limits.h> + +static bool name_threads; + +void qemu_thread_naming(bool enable) +{ +    /* But note we don't actually name them on Windows yet */ +    name_threads = enable; + +    fprintf(stderr, "qemu: thread naming not supported on this host\n"); +} + +static void error_exit(int err, const char *msg) +{ +    char *pstr; + +    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER, +                  NULL, err, 0, (LPTSTR)&pstr, 2, NULL); +    fprintf(stderr, "qemu: %s: %s\n", msg, pstr); +    LocalFree(pstr); +    abort(); +} + +void qemu_mutex_init(QemuMutex *mutex) +{ +    mutex->owner = 0; +    InitializeCriticalSection(&mutex->lock); +} + +void qemu_mutex_destroy(QemuMutex *mutex) +{ +    assert(mutex->owner == 0); +    DeleteCriticalSection(&mutex->lock); +} + +void qemu_mutex_lock(QemuMutex *mutex) +{ +    EnterCriticalSection(&mutex->lock); + +    /* Win32 CRITICAL_SECTIONs are recursive.  Assert that we're not +     * using them as such. +     */ +    assert(mutex->owner == 0); +    mutex->owner = GetCurrentThreadId(); +} + +int qemu_mutex_trylock(QemuMutex *mutex) +{ +    int owned; + +    owned = TryEnterCriticalSection(&mutex->lock); +    if (owned) { +        assert(mutex->owner == 0); +        mutex->owner = GetCurrentThreadId(); +    } +    return !owned; +} + +void qemu_mutex_unlock(QemuMutex *mutex) +{ +    assert(mutex->owner == GetCurrentThreadId()); +    mutex->owner = 0; +    LeaveCriticalSection(&mutex->lock); +} + +void qemu_cond_init(QemuCond *cond) +{ +    memset(cond, 0, sizeof(*cond)); + +    cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); +    if (!cond->sema) { +        error_exit(GetLastError(), __func__); +    } +    cond->continue_event = CreateEvent(NULL,    /* security */ +                                       FALSE,   /* auto-reset */ +                                       FALSE,   /* not signaled */ +                                       NULL);   /* name */ +    if (!cond->continue_event) { +        error_exit(GetLastError(), __func__); +    } +} + +void qemu_cond_destroy(QemuCond *cond) +{ +    BOOL result; +    result = CloseHandle(cond->continue_event); +    if (!result) { +        error_exit(GetLastError(), __func__); +    } +    cond->continue_event = 0; +    result = CloseHandle(cond->sema); +    if (!result) { +        error_exit(GetLastError(), __func__); +    } +    cond->sema = 0; +} + +void qemu_cond_signal(QemuCond *cond) +{ +    DWORD result; + +    /* +     * Signal only when there are waiters.  cond->waiters is +     * incremented by pthread_cond_wait under the external lock, +     * so we are safe about that. +     */ +    if (cond->waiters == 0) { +        return; +    } + +    /* +     * Waiting threads decrement it outside the external lock, but +     * only if another thread is executing pthread_cond_broadcast and +     * has the mutex.  So, it also cannot be decremented concurrently +     * with this particular access. +     */ +    cond->target = cond->waiters - 1; +    result = SignalObjectAndWait(cond->sema, cond->continue_event, +                                 INFINITE, FALSE); +    if (result == WAIT_ABANDONED || result == WAIT_FAILED) { +        error_exit(GetLastError(), __func__); +    } +} + +void qemu_cond_broadcast(QemuCond *cond) +{ +    BOOLEAN result; +    /* +     * As in pthread_cond_signal, access to cond->waiters and +     * cond->target is locked via the external mutex. +     */ +    if (cond->waiters == 0) { +        return; +    } + +    cond->target = 0; +    result = ReleaseSemaphore(cond->sema, cond->waiters, NULL); +    if (!result) { +        error_exit(GetLastError(), __func__); +    } + +    /* +     * At this point all waiters continue. Each one takes its +     * slice of the semaphore. Now it's our turn to wait: Since +     * the external mutex is held, no thread can leave cond_wait, +     * yet. For this reason, we can be sure that no thread gets +     * a chance to eat *more* than one slice. OTOH, it means +     * that the last waiter must send us a wake-up. +     */ +    WaitForSingleObject(cond->continue_event, INFINITE); +} + +void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) +{ +    /* +     * This access is protected under the mutex. +     */ +    cond->waiters++; + +    /* +     * Unlock external mutex and wait for signal. +     * NOTE: we've held mutex locked long enough to increment +     * waiters count above, so there's no problem with +     * leaving mutex unlocked before we wait on semaphore. +     */ +    qemu_mutex_unlock(mutex); +    WaitForSingleObject(cond->sema, INFINITE); + +    /* Now waiters must rendez-vous with the signaling thread and +     * let it continue.  For cond_broadcast this has heavy contention +     * and triggers thundering herd.  So goes life. +     * +     * Decrease waiters count.  The mutex is not taken, so we have +     * to do this atomically. +     * +     * All waiters contend for the mutex at the end of this function +     * until the signaling thread relinquishes it.  To ensure +     * each waiter consumes exactly one slice of the semaphore, +     * the signaling thread stops until it is told by the last +     * waiter that it can go on. +     */ +    if (InterlockedDecrement(&cond->waiters) == cond->target) { +        SetEvent(cond->continue_event); +    } + +    qemu_mutex_lock(mutex); +} + +void qemu_sem_init(QemuSemaphore *sem, int init) +{ +    /* Manual reset.  */ +    sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL); +} + +void qemu_sem_destroy(QemuSemaphore *sem) +{ +    CloseHandle(sem->sema); +} + +void qemu_sem_post(QemuSemaphore *sem) +{ +    ReleaseSemaphore(sem->sema, 1, NULL); +} + +int qemu_sem_timedwait(QemuSemaphore *sem, int ms) +{ +    int rc = WaitForSingleObject(sem->sema, ms); +    if (rc == WAIT_OBJECT_0) { +        return 0; +    } +    if (rc != WAIT_TIMEOUT) { +        error_exit(GetLastError(), __func__); +    } +    return -1; +} + +void qemu_sem_wait(QemuSemaphore *sem) +{ +    if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) { +        error_exit(GetLastError(), __func__); +    } +} + +void qemu_event_init(QemuEvent *ev, bool init) +{ +    /* Manual reset.  */ +    ev->event = CreateEvent(NULL, TRUE, init, NULL); +} + +void qemu_event_destroy(QemuEvent *ev) +{ +    CloseHandle(ev->event); +} + +void qemu_event_set(QemuEvent *ev) +{ +    SetEvent(ev->event); +} + +void qemu_event_reset(QemuEvent *ev) +{ +    ResetEvent(ev->event); +} + +void qemu_event_wait(QemuEvent *ev) +{ +    WaitForSingleObject(ev->event, INFINITE); +} + +struct QemuThreadData { +    /* Passed to win32_start_routine.  */ +    void             *(*start_routine)(void *); +    void             *arg; +    short             mode; +    NotifierList      exit; + +    /* Only used for joinable threads. */ +    bool              exited; +    void             *ret; +    CRITICAL_SECTION  cs; +}; + +static bool atexit_registered; +static NotifierList main_thread_exit; + +static __thread QemuThreadData *qemu_thread_data; + +static void run_main_thread_exit(void) +{ +    notifier_list_notify(&main_thread_exit, NULL); +} + +void qemu_thread_atexit_add(Notifier *notifier) +{ +    if (!qemu_thread_data) { +        if (!atexit_registered) { +            atexit_registered = true; +            atexit(run_main_thread_exit); +        } +        notifier_list_add(&main_thread_exit, notifier); +    } else { +        notifier_list_add(&qemu_thread_data->exit, notifier); +    } +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ +    notifier_remove(notifier); +} + +static unsigned __stdcall win32_start_routine(void *arg) +{ +    QemuThreadData *data = (QemuThreadData *) arg; +    void *(*start_routine)(void *) = data->start_routine; +    void *thread_arg = data->arg; + +    qemu_thread_data = data; +    qemu_thread_exit(start_routine(thread_arg)); +    abort(); +} + +void qemu_thread_exit(void *arg) +{ +    QemuThreadData *data = qemu_thread_data; + +    notifier_list_notify(&data->exit, NULL); +    if (data->mode == QEMU_THREAD_JOINABLE) { +        data->ret = arg; +        EnterCriticalSection(&data->cs); +        data->exited = true; +        LeaveCriticalSection(&data->cs); +    } else { +        g_free(data); +    } +    _endthreadex(0); +} + +void *qemu_thread_join(QemuThread *thread) +{ +    QemuThreadData *data; +    void *ret; +    HANDLE handle; + +    data = thread->data; +    if (data->mode == QEMU_THREAD_DETACHED) { +        return NULL; +    } + +    /* +     * Because multiple copies of the QemuThread can exist via +     * qemu_thread_get_self, we need to store a value that cannot +     * leak there.  The simplest, non racy way is to store the TID, +     * discard the handle that _beginthreadex gives back, and +     * get another copy of the handle here. +     */ +    handle = qemu_thread_get_handle(thread); +    if (handle) { +        WaitForSingleObject(handle, INFINITE); +        CloseHandle(handle); +    } +    ret = data->ret; +    DeleteCriticalSection(&data->cs); +    g_free(data); +    return ret; +} + +void qemu_thread_create(QemuThread *thread, const char *name, +                       void *(*start_routine)(void *), +                       void *arg, int mode) +{ +    HANDLE hThread; +    struct QemuThreadData *data; + +    data = g_malloc(sizeof *data); +    data->start_routine = start_routine; +    data->arg = arg; +    data->mode = mode; +    data->exited = false; +    notifier_list_init(&data->exit); + +    if (data->mode != QEMU_THREAD_DETACHED) { +        InitializeCriticalSection(&data->cs); +    } + +    hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine, +                                      data, 0, &thread->tid); +    if (!hThread) { +        error_exit(GetLastError(), __func__); +    } +    CloseHandle(hThread); +    thread->data = data; +} + +void qemu_thread_get_self(QemuThread *thread) +{ +    thread->data = qemu_thread_data; +    thread->tid = GetCurrentThreadId(); +} + +HANDLE qemu_thread_get_handle(QemuThread *thread) +{ +    QemuThreadData *data; +    HANDLE handle; + +    data = thread->data; +    if (data->mode == QEMU_THREAD_DETACHED) { +        return NULL; +    } + +    EnterCriticalSection(&data->cs); +    if (!data->exited) { +        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE, +                            thread->tid); +    } else { +        handle = NULL; +    } +    LeaveCriticalSection(&data->cs); +    return handle; +} + +bool qemu_thread_is_self(QemuThread *thread) +{ +    return GetCurrentThreadId() == thread->tid; +} diff --git a/util/qemu-timer-common.c b/util/qemu-timer-common.c new file mode 100644 index 00000000..95e0847c --- /dev/null +++ b/util/qemu-timer-common.c @@ -0,0 +1,61 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/timer.h" + +/***********************************************************/ +/* real time host monotonic timer */ + +#ifdef _WIN32 + +int64_t clock_freq; + +static void __attribute__((constructor)) init_get_clock(void) +{ +    LARGE_INTEGER freq; +    int ret; +    ret = QueryPerformanceFrequency(&freq); +    if (ret == 0) { +        fprintf(stderr, "Could not calibrate ticks\n"); +        exit(1); +    } +    clock_freq = freq.QuadPart; +} + +#else + +int use_rt_clock; + +static void __attribute__((constructor)) init_get_clock(void) +{ +    use_rt_clock = 0; +#ifdef CLOCK_MONOTONIC +    { +        struct timespec ts; +        if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { +            use_rt_clock = 1; +        } +    } +#endif +} +#endif diff --git a/util/rcu.c b/util/rcu.c new file mode 100644 index 00000000..cdcad678 --- /dev/null +++ b/util/rcu.c @@ -0,0 +1,330 @@ +/* + * urcu-mb.c + * + * Userspace RCU library with explicit memory barriers + * + * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * Copyright 2015 Red Hat, Inc. + * + * Ported to QEMU by Paolo Bonzini  <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu-common.h" +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#include <errno.h> +#include "qemu/rcu.h" +#include "qemu/atomic.h" +#include "qemu/thread.h" +#include "qemu/main-loop.h" + +/* + * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr. + * Bits 1 and above are defined in synchronize_rcu. + */ +#define RCU_GP_LOCKED           (1UL << 0) +#define RCU_GP_CTR              (1UL << 1) + +unsigned long rcu_gp_ctr = RCU_GP_LOCKED; + +QemuEvent rcu_gp_event; +static QemuMutex rcu_gp_lock; + +/* + * Check whether a quiescent state was crossed between the beginning of + * update_counter_and_wait and now. + */ +static inline int rcu_gp_ongoing(unsigned long *ctr) +{ +    unsigned long v; + +    v = atomic_read(ctr); +    return v && (v != rcu_gp_ctr); +} + +/* Written to only by each individual reader. Read by both the reader and the + * writers. + */ +__thread struct rcu_reader_data rcu_reader; + +/* Protected by rcu_gp_lock.  */ +typedef QLIST_HEAD(, rcu_reader_data) ThreadList; +static ThreadList registry = QLIST_HEAD_INITIALIZER(registry); + +/* Wait for previous parity/grace period to be empty of readers.  */ +static void wait_for_readers(void) +{ +    ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders); +    struct rcu_reader_data *index, *tmp; + +    for (;;) { +        /* We want to be notified of changes made to rcu_gp_ongoing +         * while we walk the list. +         */ +        qemu_event_reset(&rcu_gp_event); + +        /* Instead of using atomic_mb_set for index->waiting, and +         * atomic_mb_read for index->ctr, memory barriers are placed +         * manually since writes to different threads are independent. +         * atomic_mb_set has a smp_wmb before... +         */ +        smp_wmb(); +        QLIST_FOREACH(index, ®istry, node) { +            atomic_set(&index->waiting, true); +        } + +        /* ... and a smp_mb after.  */ +        smp_mb(); + +        QLIST_FOREACH_SAFE(index, ®istry, node, tmp) { +            if (!rcu_gp_ongoing(&index->ctr)) { +                QLIST_REMOVE(index, node); +                QLIST_INSERT_HEAD(&qsreaders, index, node); + +                /* No need for mb_set here, worst of all we +                 * get some extra futex wakeups. +                 */ +                atomic_set(&index->waiting, false); +            } +        } + +        /* atomic_mb_read has smp_rmb after.  */ +        smp_rmb(); + +        if (QLIST_EMPTY(®istry)) { +            break; +        } + +        /* Wait for one thread to report a quiescent state and +         * try again. +         */ +        qemu_event_wait(&rcu_gp_event); +    } + +    /* put back the reader list in the registry */ +    QLIST_SWAP(®istry, &qsreaders, node); +} + +void synchronize_rcu(void) +{ +    qemu_mutex_lock(&rcu_gp_lock); + +    if (!QLIST_EMPTY(®istry)) { +        /* In either case, the atomic_mb_set below blocks stores that free +         * old RCU-protected pointers. +         */ +        if (sizeof(rcu_gp_ctr) < 8) { +            /* For architectures with 32-bit longs, a two-subphases algorithm +             * ensures we do not encounter overflow bugs. +             * +             * Switch parity: 0 -> 1, 1 -> 0. +             */ +            atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); +            wait_for_readers(); +            atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); +        } else { +            /* Increment current grace period.  */ +            atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); +        } + +        wait_for_readers(); +    } + +    qemu_mutex_unlock(&rcu_gp_lock); +} + + +#define RCU_CALL_MIN_SIZE        30 + +/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h + * from liburcu.  Note that head is only used by the consumer. + */ +static struct rcu_head dummy; +static struct rcu_head *head = &dummy, **tail = &dummy.next; +static int rcu_call_count; +static QemuEvent rcu_call_ready_event; + +static void enqueue(struct rcu_head *node) +{ +    struct rcu_head **old_tail; + +    node->next = NULL; +    old_tail = atomic_xchg(&tail, &node->next); +    atomic_mb_set(old_tail, node); +} + +static struct rcu_head *try_dequeue(void) +{ +    struct rcu_head *node, *next; + +retry: +    /* Test for an empty list, which we do not expect.  Note that for +     * the consumer head and tail are always consistent.  The head +     * is consistent because only the consumer reads/writes it. +     * The tail, because it is the first step in the enqueuing. +     * It is only the next pointers that might be inconsistent. +     */ +    if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) { +        abort(); +    } + +    /* If the head node has NULL in its next pointer, the value is +     * wrong and we need to wait until its enqueuer finishes the update. +     */ +    node = head; +    next = atomic_mb_read(&head->next); +    if (!next) { +        return NULL; +    } + +    /* Since we are the sole consumer, and we excluded the empty case +     * above, the queue will always have at least two nodes: the +     * dummy node, and the one being removed.  So we do not need to update +     * the tail pointer. +     */ +    head = next; + +    /* If we dequeued the dummy node, add it back at the end and retry.  */ +    if (node == &dummy) { +        enqueue(node); +        goto retry; +    } + +    return node; +} + +static void *call_rcu_thread(void *opaque) +{ +    struct rcu_head *node; + +    rcu_register_thread(); + +    for (;;) { +        int tries = 0; +        int n = atomic_read(&rcu_call_count); + +        /* Heuristically wait for a decent number of callbacks to pile up. +         * Fetch rcu_call_count now, we only must process elements that were +         * added before synchronize_rcu() starts. +         */ +        while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) { +            g_usleep(10000); +            if (n == 0) { +                qemu_event_reset(&rcu_call_ready_event); +                n = atomic_read(&rcu_call_count); +                if (n == 0) { +                    qemu_event_wait(&rcu_call_ready_event); +                } +            } +            n = atomic_read(&rcu_call_count); +        } + +        atomic_sub(&rcu_call_count, n); +        synchronize_rcu(); +        qemu_mutex_lock_iothread(); +        while (n > 0) { +            node = try_dequeue(); +            while (!node) { +                qemu_mutex_unlock_iothread(); +                qemu_event_reset(&rcu_call_ready_event); +                node = try_dequeue(); +                if (!node) { +                    qemu_event_wait(&rcu_call_ready_event); +                    node = try_dequeue(); +                } +                qemu_mutex_lock_iothread(); +            } + +            n--; +            node->func(node); +        } +        qemu_mutex_unlock_iothread(); +    } +    abort(); +} + +void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node)) +{ +    node->func = func; +    enqueue(node); +    atomic_inc(&rcu_call_count); +    qemu_event_set(&rcu_call_ready_event); +} + +void rcu_register_thread(void) +{ +    assert(rcu_reader.ctr == 0); +    qemu_mutex_lock(&rcu_gp_lock); +    QLIST_INSERT_HEAD(®istry, &rcu_reader, node); +    qemu_mutex_unlock(&rcu_gp_lock); +} + +void rcu_unregister_thread(void) +{ +    qemu_mutex_lock(&rcu_gp_lock); +    QLIST_REMOVE(&rcu_reader, node); +    qemu_mutex_unlock(&rcu_gp_lock); +} + +static void rcu_init_complete(void) +{ +    QemuThread thread; + +    qemu_mutex_init(&rcu_gp_lock); +    qemu_event_init(&rcu_gp_event, true); + +    qemu_event_init(&rcu_call_ready_event, false); + +    /* The caller is assumed to have iothread lock, so the call_rcu thread +     * must have been quiescent even after forking, just recreate it. +     */ +    qemu_thread_create(&thread, "call_rcu", call_rcu_thread, +                       NULL, QEMU_THREAD_DETACHED); + +    rcu_register_thread(); +} + +#ifdef CONFIG_POSIX +static void rcu_init_lock(void) +{ +    qemu_mutex_lock(&rcu_gp_lock); +} + +static void rcu_init_unlock(void) +{ +    qemu_mutex_unlock(&rcu_gp_lock); +} +#endif + +void rcu_after_fork(void) +{ +    memset(®istry, 0, sizeof(registry)); +    rcu_init_complete(); +} + +static void __attribute__((__constructor__)) rcu_init(void) +{ +#ifdef CONFIG_POSIX +    pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_unlock); +#endif +    rcu_init_complete(); +} diff --git a/util/readline.c b/util/readline.c new file mode 100644 index 00000000..cc1302ac --- /dev/null +++ b/util/readline.c @@ -0,0 +1,515 @@ +/* + * QEMU readline utility + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/readline.h" + +#define IS_NORM 0 +#define IS_ESC  1 +#define IS_CSI  2 +#define IS_SS3  3 + +void readline_show_prompt(ReadLineState *rs) +{ +    rs->printf_func(rs->opaque, "%s", rs->prompt); +    rs->flush_func(rs->opaque); +    rs->last_cmd_buf_index = 0; +    rs->last_cmd_buf_size = 0; +    rs->esc_state = IS_NORM; +} + +/* update the displayed command line */ +static void readline_update(ReadLineState *rs) +{ +    int i, delta, len; + +    if (rs->cmd_buf_size != rs->last_cmd_buf_size || +        memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) { +        for(i = 0; i < rs->last_cmd_buf_index; i++) { +            rs->printf_func(rs->opaque, "\033[D"); +        } +        rs->cmd_buf[rs->cmd_buf_size] = '\0'; +        if (rs->read_password) { +            len = strlen(rs->cmd_buf); +            for(i = 0; i < len; i++) +                rs->printf_func(rs->opaque, "*"); +        } else { +            rs->printf_func(rs->opaque, "%s", rs->cmd_buf); +        } +        rs->printf_func(rs->opaque, "\033[K"); +        memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size); +        rs->last_cmd_buf_size = rs->cmd_buf_size; +        rs->last_cmd_buf_index = rs->cmd_buf_size; +    } +    if (rs->cmd_buf_index != rs->last_cmd_buf_index) { +        delta = rs->cmd_buf_index - rs->last_cmd_buf_index; +        if (delta > 0) { +            for(i = 0;i < delta; i++) { +                rs->printf_func(rs->opaque, "\033[C"); +            } +        } else { +            delta = -delta; +            for(i = 0;i < delta; i++) { +                rs->printf_func(rs->opaque, "\033[D"); +            } +        } +        rs->last_cmd_buf_index = rs->cmd_buf_index; +    } +    rs->flush_func(rs->opaque); +} + +static void readline_insert_char(ReadLineState *rs, int ch) +{ +    if (rs->cmd_buf_index < READLINE_CMD_BUF_SIZE) { +        memmove(rs->cmd_buf + rs->cmd_buf_index + 1, +                rs->cmd_buf + rs->cmd_buf_index, +                rs->cmd_buf_size - rs->cmd_buf_index); +        rs->cmd_buf[rs->cmd_buf_index] = ch; +        rs->cmd_buf_size++; +        rs->cmd_buf_index++; +    } +} + +static void readline_backward_char(ReadLineState *rs) +{ +    if (rs->cmd_buf_index > 0) { +        rs->cmd_buf_index--; +    } +} + +static void readline_forward_char(ReadLineState *rs) +{ +    if (rs->cmd_buf_index < rs->cmd_buf_size) { +        rs->cmd_buf_index++; +    } +} + +static void readline_delete_char(ReadLineState *rs) +{ +    if (rs->cmd_buf_index < rs->cmd_buf_size) { +        memmove(rs->cmd_buf + rs->cmd_buf_index, +                rs->cmd_buf + rs->cmd_buf_index + 1, +                rs->cmd_buf_size - rs->cmd_buf_index - 1); +        rs->cmd_buf_size--; +    } +} + +static void readline_backspace(ReadLineState *rs) +{ +    if (rs->cmd_buf_index > 0) { +        readline_backward_char(rs); +        readline_delete_char(rs); +    } +} + +static void readline_backword(ReadLineState *rs) +{ +    int start; + +    if (rs->cmd_buf_index == 0 || rs->cmd_buf_index > rs->cmd_buf_size) { +        return; +    } + +    start = rs->cmd_buf_index - 1; + +    /* find first word (backwards) */ +    while (start > 0) { +        if (!qemu_isspace(rs->cmd_buf[start])) { +            break; +        } + +        --start; +    } + +    /* find first space (backwards) */ +    while (start > 0) { +        if (qemu_isspace(rs->cmd_buf[start])) { +            ++start; +            break; +        } + +        --start; +    } + +    /* remove word */ +    if (start < rs->cmd_buf_index) { +        memmove(rs->cmd_buf + start, +                rs->cmd_buf + rs->cmd_buf_index, +                rs->cmd_buf_size - rs->cmd_buf_index); +        rs->cmd_buf_size -= rs->cmd_buf_index - start; +        rs->cmd_buf_index = start; +    } +} + +static void readline_bol(ReadLineState *rs) +{ +    rs->cmd_buf_index = 0; +} + +static void readline_eol(ReadLineState *rs) +{ +    rs->cmd_buf_index = rs->cmd_buf_size; +} + +static void readline_up_char(ReadLineState *rs) +{ +    int idx; + +    if (rs->hist_entry == 0) +	return; +    if (rs->hist_entry == -1) { +	/* Find latest entry */ +	for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { +	    if (rs->history[idx] == NULL) +		break; +	} +	rs->hist_entry = idx; +    } +    rs->hist_entry--; +    if (rs->hist_entry >= 0) { +	pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), +                rs->history[rs->hist_entry]); +	rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); +    } +} + +static void readline_down_char(ReadLineState *rs) +{ +    if (rs->hist_entry == -1) +        return; +    if (rs->hist_entry < READLINE_MAX_CMDS - 1 && +        rs->history[++rs->hist_entry] != NULL) { +	pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), +                rs->history[rs->hist_entry]); +    } else { +        rs->cmd_buf[0] = 0; +	rs->hist_entry = -1; +    } +    rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); +} + +static void readline_hist_add(ReadLineState *rs, const char *cmdline) +{ +    char *hist_entry, *new_entry; +    int idx; + +    if (cmdline[0] == '\0') +	return; +    new_entry = NULL; +    if (rs->hist_entry != -1) { +	/* We were editing an existing history entry: replace it */ +	hist_entry = rs->history[rs->hist_entry]; +	idx = rs->hist_entry; +	if (strcmp(hist_entry, cmdline) == 0) { +	    goto same_entry; +	} +    } +    /* Search cmdline in history buffers */ +    for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { +	hist_entry = rs->history[idx]; +	if (hist_entry == NULL) +	    break; +	if (strcmp(hist_entry, cmdline) == 0) { +	same_entry: +	    new_entry = hist_entry; +	    /* Put this entry at the end of history */ +	    memmove(&rs->history[idx], &rs->history[idx + 1], +		    (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *)); +	    rs->history[READLINE_MAX_CMDS - 1] = NULL; +	    for (; idx < READLINE_MAX_CMDS; idx++) { +		if (rs->history[idx] == NULL) +		    break; +	    } +	    break; +	} +    } +    if (idx == READLINE_MAX_CMDS) { +	/* Need to get one free slot */ +        g_free(rs->history[0]); +	memmove(rs->history, &rs->history[1], +	        (READLINE_MAX_CMDS - 1) * sizeof(char *)); +	rs->history[READLINE_MAX_CMDS - 1] = NULL; +	idx = READLINE_MAX_CMDS - 1; +    } +    if (new_entry == NULL) +        new_entry = g_strdup(cmdline); +    rs->history[idx] = new_entry; +    rs->hist_entry = -1; +} + +/* completion support */ + +void readline_add_completion(ReadLineState *rs, const char *str) +{ +    if (rs->nb_completions < READLINE_MAX_COMPLETIONS) { +        int i; +        for (i = 0; i < rs->nb_completions; i++) { +            if (!strcmp(rs->completions[i], str)) { +                return; +            } +        } +        rs->completions[rs->nb_completions++] = g_strdup(str); +    } +} + +void readline_set_completion_index(ReadLineState *rs, int index) +{ +    rs->completion_index = index; +} + +static int completion_comp(const void *a, const void *b) +{ +    return strcmp(*(const char **) a, *(const char **) b); +} + +static void readline_completion(ReadLineState *rs) +{ +    int len, i, j, max_width, nb_cols, max_prefix; +    char *cmdline; + +    rs->nb_completions = 0; + +    cmdline = g_strndup(rs->cmd_buf, rs->cmd_buf_index); +    rs->completion_finder(rs->opaque, cmdline); +    g_free(cmdline); + +    /* no completion found */ +    if (rs->nb_completions <= 0) +        return; +    if (rs->nb_completions == 1) { +        len = strlen(rs->completions[0]); +        for(i = rs->completion_index; i < len; i++) { +            readline_insert_char(rs, rs->completions[0][i]); +        } +        /* extra space for next argument. XXX: make it more generic */ +        if (len > 0 && rs->completions[0][len - 1] != '/') +            readline_insert_char(rs, ' '); +    } else { +        qsort(rs->completions, rs->nb_completions, sizeof(char *), +              completion_comp); +        rs->printf_func(rs->opaque, "\n"); +        max_width = 0; +        max_prefix = 0;	 +        for(i = 0; i < rs->nb_completions; i++) { +            len = strlen(rs->completions[i]); +            if (i==0) { +                max_prefix = len; +            } else { +                if (len < max_prefix) +                    max_prefix = len; +                for(j=0; j<max_prefix; j++) { +                    if (rs->completions[i][j] != rs->completions[0][j]) +                        max_prefix = j; +                } +            } +            if (len > max_width) +                max_width = len; +        } +        if (max_prefix > 0)  +            for(i = rs->completion_index; i < max_prefix; i++) { +                readline_insert_char(rs, rs->completions[0][i]); +            } +        max_width += 2; +        if (max_width < 10) +            max_width = 10; +        else if (max_width > 80) +            max_width = 80; +        nb_cols = 80 / max_width; +        j = 0; +        for(i = 0; i < rs->nb_completions; i++) { +            rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]); +            if (++j == nb_cols || i == (rs->nb_completions - 1)) { +                rs->printf_func(rs->opaque, "\n"); +                j = 0; +            } +        } +        readline_show_prompt(rs); +    } +    for (i = 0; i < rs->nb_completions; i++) { +        g_free(rs->completions[i]); +    } +} + +static void readline_clear_screen(ReadLineState *rs) +{ +    rs->printf_func(rs->opaque, "\033[2J\033[1;1H"); +    readline_show_prompt(rs); +} + +/* return true if command handled */ +void readline_handle_byte(ReadLineState *rs, int ch) +{ +    switch(rs->esc_state) { +    case IS_NORM: +        switch(ch) { +        case 1: +            readline_bol(rs); +            break; +        case 4: +            readline_delete_char(rs); +            break; +        case 5: +            readline_eol(rs); +            break; +        case 9: +            readline_completion(rs); +            break; +        case 12: +            readline_clear_screen(rs); +            break; +        case 10: +        case 13: +            rs->cmd_buf[rs->cmd_buf_size] = '\0'; +            if (!rs->read_password) +                readline_hist_add(rs, rs->cmd_buf); +            rs->printf_func(rs->opaque, "\n"); +            rs->cmd_buf_index = 0; +            rs->cmd_buf_size = 0; +            rs->last_cmd_buf_index = 0; +            rs->last_cmd_buf_size = 0; +            rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque); +            break; +        case 23: +            /* ^W */ +            readline_backword(rs); +            break; +        case 27: +            rs->esc_state = IS_ESC; +            break; +        case 127: +        case 8: +            readline_backspace(rs); +            break; +	case 155: +            rs->esc_state = IS_CSI; +	    break; +        default: +            if (ch >= 32) { +                readline_insert_char(rs, ch); +            } +            break; +        } +        break; +    case IS_ESC: +        if (ch == '[') { +            rs->esc_state = IS_CSI; +            rs->esc_param = 0; +        } else if (ch == 'O') { +            rs->esc_state = IS_SS3; +            rs->esc_param = 0; +        } else { +            rs->esc_state = IS_NORM; +        } +        break; +    case IS_CSI: +        switch(ch) { +	case 'A': +	case 'F': +	    readline_up_char(rs); +	    break; +	case 'B': +	case 'E': +	    readline_down_char(rs); +	    break; +        case 'D': +            readline_backward_char(rs); +            break; +        case 'C': +            readline_forward_char(rs); +            break; +        case '0' ... '9': +            rs->esc_param = rs->esc_param * 10 + (ch - '0'); +            goto the_end; +        case '~': +            switch(rs->esc_param) { +            case 1: +                readline_bol(rs); +                break; +            case 3: +                readline_delete_char(rs); +                break; +            case 4: +                readline_eol(rs); +                break; +            } +            break; +        default: +            break; +        } +        rs->esc_state = IS_NORM; +    the_end: +        break; +    case IS_SS3: +        switch(ch) { +        case 'F': +            readline_eol(rs); +            break; +        case 'H': +            readline_bol(rs); +            break; +        } +        rs->esc_state = IS_NORM; +        break; +    } +    readline_update(rs); +} + +void readline_start(ReadLineState *rs, const char *prompt, int read_password, +                    ReadLineFunc *readline_func, void *opaque) +{ +    pstrcpy(rs->prompt, sizeof(rs->prompt), prompt); +    rs->readline_func = readline_func; +    rs->readline_opaque = opaque; +    rs->read_password = read_password; +    readline_restart(rs); +} + +void readline_restart(ReadLineState *rs) +{ +    rs->cmd_buf_index = 0; +    rs->cmd_buf_size = 0; +} + +const char *readline_get_history(ReadLineState *rs, unsigned int index) +{ +    if (index >= READLINE_MAX_CMDS) +        return NULL; +    return rs->history[index]; +} + +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, +                             ReadLineFlushFunc *flush_func, +                             void *opaque, +                             ReadLineCompletionFunc *completion_finder) +{ +    ReadLineState *rs = g_malloc0(sizeof(*rs)); + +    rs->hist_entry = -1; +    rs->opaque = opaque; +    rs->printf_func = printf_func; +    rs->flush_func = flush_func; +    rs->completion_finder = completion_finder; + +    return rs; +} diff --git a/util/rfifolock.c b/util/rfifolock.c new file mode 100644 index 00000000..afbf7488 --- /dev/null +++ b/util/rfifolock.c @@ -0,0 +1,78 @@ +/* + * Recursive FIFO lock + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + *  Stefan Hajnoczi   <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <assert.h> +#include "qemu/rfifolock.h" + +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque) +{ +    qemu_mutex_init(&r->lock); +    r->head = 0; +    r->tail = 0; +    qemu_cond_init(&r->cond); +    r->nesting = 0; +    r->cb = cb; +    r->cb_opaque = opaque; +} + +void rfifolock_destroy(RFifoLock *r) +{ +    qemu_cond_destroy(&r->cond); +    qemu_mutex_destroy(&r->lock); +} + +/* + * Theory of operation: + * + * In order to ensure FIFO ordering, implement a ticketlock.  Threads acquiring + * the lock enqueue themselves by incrementing the tail index.  When the lock + * is unlocked, the head is incremented and waiting threads are notified. + * + * Recursive locking does not take a ticket since the head is only incremented + * when the outermost recursive caller unlocks. + */ +void rfifolock_lock(RFifoLock *r) +{ +    qemu_mutex_lock(&r->lock); + +    /* Take a ticket */ +    unsigned int ticket = r->tail++; + +    if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) { +        r->tail--; /* put ticket back, we're nesting */ +    } else { +        while (ticket != r->head) { +            /* Invoke optional contention callback */ +            if (r->cb) { +                r->cb(r->cb_opaque); +            } +            qemu_cond_wait(&r->cond, &r->lock); +        } +    } + +    qemu_thread_get_self(&r->owner_thread); +    r->nesting++; +    qemu_mutex_unlock(&r->lock); +} + +void rfifolock_unlock(RFifoLock *r) +{ +    qemu_mutex_lock(&r->lock); +    assert(r->nesting > 0); +    assert(qemu_thread_is_self(&r->owner_thread)); +    if (--r->nesting == 0) { +        r->head++; +        qemu_cond_broadcast(&r->cond); +    } +    qemu_mutex_unlock(&r->lock); +} diff --git a/util/throttle.c b/util/throttle.c new file mode 100644 index 00000000..706c1311 --- /dev/null +++ b/util/throttle.c @@ -0,0 +1,430 @@ +/* + * QEMU throttling infrastructure + * + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + *   Benoît Canet <benoit.canet@nodalink.com> + *   Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/throttle.h" +#include "qemu/timer.h" +#include "block/aio.h" + +/* This function make a bucket leak + * + * @bkt:   the bucket to make leak + * @delta_ns: the time delta + */ +void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) +{ +    double leak; + +    /* compute how much to leak */ +    leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; + +    /* make the bucket leak */ +    bkt->level = MAX(bkt->level - leak, 0); +} + +/* Calculate the time delta since last leak and make proportionals leaks + * + * @now:      the current timestamp in ns + */ +static void throttle_do_leak(ThrottleState *ts, int64_t now) +{ +    /* compute the time elapsed since the last leak */ +    int64_t delta_ns = now - ts->previous_leak; +    int i; + +    ts->previous_leak = now; + +    if (delta_ns <= 0) { +        return; +    } + +    /* make each bucket leak */ +    for (i = 0; i < BUCKETS_COUNT; i++) { +        throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); +    } +} + +/* do the real job of computing the time to wait + * + * @limit: the throttling limit + * @extra: the number of operation to delay + * @ret:   the time to wait in ns + */ +static int64_t throttle_do_compute_wait(double limit, double extra) +{ +    double wait = extra * NANOSECONDS_PER_SECOND; +    wait /= limit; +    return wait; +} + +/* This function compute the wait time in ns that a leaky bucket should trigger + * + * @bkt: the leaky bucket we operate on + * @ret: the resulting wait time in ns or 0 if the operation can go through + */ +int64_t throttle_compute_wait(LeakyBucket *bkt) +{ +    double extra; /* the number of extra units blocking the io */ + +    if (!bkt->avg) { +        return 0; +    } + +    extra = bkt->level - bkt->max; + +    if (extra <= 0) { +        return 0; +    } + +    return throttle_do_compute_wait(bkt->avg, extra); +} + +/* This function compute the time that must be waited while this IO + * + * @is_write:   true if the current IO is a write, false if it's a read + * @ret:        time to wait + */ +static int64_t throttle_compute_wait_for(ThrottleState *ts, +                                         bool is_write) +{ +    BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, +                                   THROTTLE_OPS_TOTAL, +                                   THROTTLE_BPS_READ, +                                   THROTTLE_OPS_READ}, +                                  {THROTTLE_BPS_TOTAL, +                                   THROTTLE_OPS_TOTAL, +                                   THROTTLE_BPS_WRITE, +                                   THROTTLE_OPS_WRITE}, }; +    int64_t wait, max_wait = 0; +    int i; + +    for (i = 0; i < 4; i++) { +        BucketType index = to_check[is_write][i]; +        wait = throttle_compute_wait(&ts->cfg.buckets[index]); +        if (wait > max_wait) { +            max_wait = wait; +        } +    } + +    return max_wait; +} + +/* compute the timer for this type of operation + * + * @is_write:   the type of operation + * @now:        the current clock timestamp + * @next_timestamp: the resulting timer + * @ret:        true if a timer must be set + */ +bool throttle_compute_timer(ThrottleState *ts, +                            bool is_write, +                            int64_t now, +                            int64_t *next_timestamp) +{ +    int64_t wait; + +    /* leak proportionally to the time elapsed */ +    throttle_do_leak(ts, now); + +    /* compute the wait time if any */ +    wait = throttle_compute_wait_for(ts, is_write); + +    /* if the code must wait compute when the next timer should fire */ +    if (wait) { +        *next_timestamp = now + wait; +        return true; +    } + +    /* else no need to wait at all */ +    *next_timestamp = now; +    return false; +} + +/* Add timers to event loop */ +void throttle_timers_attach_aio_context(ThrottleTimers *tt, +                                        AioContext *new_context) +{ +    tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, +                                  tt->read_timer_cb, tt->timer_opaque); +    tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, +                                  tt->write_timer_cb, tt->timer_opaque); +} + +/* To be called first on the ThrottleState */ +void throttle_init(ThrottleState *ts) +{ +    memset(ts, 0, sizeof(ThrottleState)); +} + +/* To be called first on the ThrottleTimers */ +void throttle_timers_init(ThrottleTimers *tt, +                          AioContext *aio_context, +                          QEMUClockType clock_type, +                          QEMUTimerCB *read_timer_cb, +                          QEMUTimerCB *write_timer_cb, +                          void *timer_opaque) +{ +    memset(tt, 0, sizeof(ThrottleTimers)); + +    tt->clock_type = clock_type; +    tt->read_timer_cb = read_timer_cb; +    tt->write_timer_cb = write_timer_cb; +    tt->timer_opaque = timer_opaque; +    throttle_timers_attach_aio_context(tt, aio_context); +} + +/* destroy a timer */ +static void throttle_timer_destroy(QEMUTimer **timer) +{ +    assert(*timer != NULL); + +    timer_del(*timer); +    timer_free(*timer); +    *timer = NULL; +} + +/* Remove timers from event loop */ +void throttle_timers_detach_aio_context(ThrottleTimers *tt) +{ +    int i; + +    for (i = 0; i < 2; i++) { +        throttle_timer_destroy(&tt->timers[i]); +    } +} + +/* To be called last on the ThrottleTimers */ +void throttle_timers_destroy(ThrottleTimers *tt) +{ +    throttle_timers_detach_aio_context(tt); +} + +/* is any throttling timer configured */ +bool throttle_timers_are_initialized(ThrottleTimers *tt) +{ +    if (tt->timers[0]) { +        return true; +    } + +    return false; +} + +/* Does any throttling must be done + * + * @cfg: the throttling configuration to inspect + * @ret: true if throttling must be done else false + */ +bool throttle_enabled(ThrottleConfig *cfg) +{ +    int i; + +    for (i = 0; i < BUCKETS_COUNT; i++) { +        if (cfg->buckets[i].avg > 0) { +            return true; +        } +    } + +    return false; +} + +/* return true if any two throttling parameters conflicts + * + * @cfg: the throttling configuration to inspect + * @ret: true if any conflict detected else false + */ +bool throttle_conflicting(ThrottleConfig *cfg) +{ +    bool bps_flag, ops_flag; +    bool bps_max_flag, ops_max_flag; + +    bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && +               (cfg->buckets[THROTTLE_BPS_READ].avg || +                cfg->buckets[THROTTLE_BPS_WRITE].avg); + +    ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && +               (cfg->buckets[THROTTLE_OPS_READ].avg || +                cfg->buckets[THROTTLE_OPS_WRITE].avg); + +    bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && +                  (cfg->buckets[THROTTLE_BPS_READ].max  || +                   cfg->buckets[THROTTLE_BPS_WRITE].max); + +    ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && +                   (cfg->buckets[THROTTLE_OPS_READ].max || +                   cfg->buckets[THROTTLE_OPS_WRITE].max); + +    return bps_flag || ops_flag || bps_max_flag || ops_max_flag; +} + +/* check if a throttling configuration is valid + * @cfg: the throttling configuration to inspect + * @ret: true if valid else false + */ +bool throttle_is_valid(ThrottleConfig *cfg) +{ +    bool invalid = false; +    int i; + +    for (i = 0; i < BUCKETS_COUNT; i++) { +        if (cfg->buckets[i].avg < 0) { +            invalid = true; +        } +    } + +    for (i = 0; i < BUCKETS_COUNT; i++) { +        if (cfg->buckets[i].max < 0) { +            invalid = true; +        } +    } + +    return !invalid; +} + +/* fix bucket parameters */ +static void throttle_fix_bucket(LeakyBucket *bkt) +{ +    double min; + +    /* zero bucket level */ +    bkt->level = 0; + +    /* The following is done to cope with the Linux CFQ block scheduler +     * which regroup reads and writes by block of 100ms in the guest. +     * When they are two process one making reads and one making writes cfq +     * make a pattern looking like the following: +     * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR +     * Having a max burst value of 100ms of the average will help smooth the +     * throttling +     */ +    min = bkt->avg / 10; +    if (bkt->avg && !bkt->max) { +        bkt->max = min; +    } +} + +/* take care of canceling a timer */ +static void throttle_cancel_timer(QEMUTimer *timer) +{ +    assert(timer != NULL); + +    timer_del(timer); +} + +/* Used to configure the throttle + * + * @ts: the throttle state we are working on + * @tt: the throttle timers we use in this aio context + * @cfg: the config to set + */ +void throttle_config(ThrottleState *ts, +                     ThrottleTimers *tt, +                     ThrottleConfig *cfg) +{ +    int i; + +    ts->cfg = *cfg; + +    for (i = 0; i < BUCKETS_COUNT; i++) { +        throttle_fix_bucket(&ts->cfg.buckets[i]); +    } + +    ts->previous_leak = qemu_clock_get_ns(tt->clock_type); + +    for (i = 0; i < 2; i++) { +        throttle_cancel_timer(tt->timers[i]); +    } +} + +/* used to get config + * + * @ts:  the throttle state we are working on + * @cfg: the config to write + */ +void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) +{ +    *cfg = ts->cfg; +} + + +/* Schedule the read or write timer if needed + * + * NOTE: this function is not unit tested due to it's usage of timer_mod + * + * @tt:       the timers structure + * @is_write: the type of operation (read/write) + * @ret:      true if the timer has been scheduled else false + */ +bool throttle_schedule_timer(ThrottleState *ts, +                             ThrottleTimers *tt, +                             bool is_write) +{ +    int64_t now = qemu_clock_get_ns(tt->clock_type); +    int64_t next_timestamp; +    bool must_wait; + +    must_wait = throttle_compute_timer(ts, +                                       is_write, +                                       now, +                                       &next_timestamp); + +    /* request not throttled */ +    if (!must_wait) { +        return false; +    } + +    /* request throttled and timer pending -> do nothing */ +    if (timer_pending(tt->timers[is_write])) { +        return true; +    } + +    /* request throttled and timer not pending -> arm timer */ +    timer_mod(tt->timers[is_write], next_timestamp); +    return true; +} + +/* do the accounting for this operation + * + * @is_write: the type of operation (read/write) + * @size:     the size of the operation + */ +void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) +{ +    double units = 1.0; + +    /* if cfg.op_size is defined and smaller than size we compute unit count */ +    if (ts->cfg.op_size && size > ts->cfg.op_size) { +        units = (double) size / ts->cfg.op_size; +    } + +    ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; +    ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; + +    if (is_write) { +        ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; +        ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; +    } else { +        ts->cfg.buckets[THROTTLE_BPS_READ].level += size; +        ts->cfg.buckets[THROTTLE_OPS_READ].level += units; +    } +} + diff --git a/util/unicode.c b/util/unicode.c new file mode 100644 index 00000000..d1c86588 --- /dev/null +++ b/util/unicode.c @@ -0,0 +1,100 @@ +/* + * Dealing with Unicode + * + * Copyright (C) 2013 Red Hat, Inc. + * + * Authors: + *  Markus Armbruster <armbru@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later.  See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" + +/** + * mod_utf8_codepoint: + * @s: string encoded in modified UTF-8 + * @n: maximum number of bytes to read from @s, if less than 6 + * @end: set to end of sequence on return + * + * Convert the modified UTF-8 sequence at the start of @s.  Modified + * UTF-8 is exactly like UTF-8, except U+0000 is encoded as + * "\xC0\x80". + * + * If @n is zero or @s points to a zero byte, the sequence is invalid, + * and @end is set to @s. + * + * If @s points to an impossible byte (0xFE or 0xFF) or a continuation + * byte, the sequence is invalid, and @end is set to @s + 1 + * + * Else, the first byte determines how many continuation bytes are + * expected.  If there are fewer, the sequence is invalid, and @end is + * set to @s + 1 + actual number of continuation bytes.  Else, the + * sequence is well-formed, and @end is set to @s + 1 + expected + * number of continuation bytes. + * + * A well-formed sequence is valid unless it encodes a codepoint + * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66 + * noncharacters, a surrogate codepoint, or is overlong.  Except the + * overlong sequence "\xC0\x80" is valid. + * + * Conversion succeeds if and only if the sequence is valid. + * + * Returns: the Unicode codepoint on success, -1 on failure. + */ +int mod_utf8_codepoint(const char *s, size_t n, char **end) +{ +    static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 }; +    const unsigned char *p; +    unsigned byte, mask, len, i; +    int cp; + +    if (n == 0 || *s == 0) { +        /* empty sequence */ +        *end = (char *)s; +        return -1; +    } + +    p = (const unsigned char *)s; +    byte = *p++; +    if (byte < 0x80) { +        cp = byte;              /* one byte sequence */ +    } else if (byte >= 0xFE) { +        cp = -1;                /* impossible bytes 0xFE, 0xFF */ +    } else if ((byte & 0x40) == 0) { +        cp = -1;                /* unexpected continuation byte */ +    } else { +        /* multi-byte sequence */ +        len = 0; +        for (mask = 0x80; byte & mask; mask >>= 1) { +            len++; +        } +        assert(len > 1 && len < 7); +        cp = byte & (mask - 1); +        for (i = 1; i < len; i++) { +            byte = i < n ? *p : 0; +            if ((byte & 0xC0) != 0x80) { +                cp = -1;        /* continuation byte missing */ +                goto out; +            } +            p++; +            cp <<= 6; +            cp |= byte & 0x3F; +        } +        if (cp > 0x10FFFF) { +            cp = -1;            /* beyond Unicode range */ +        } else if ((cp >= 0xFDD0 && cp <= 0xFDEF) +                   || (cp & 0xFFFE) == 0xFFFE) { +            cp = -1;            /* noncharacter */ +        } else if (cp >= 0xD800 && cp <= 0xDFFF) { +            cp = -1;            /* surrogate code point */ +        } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) { +            cp = -1;            /* overlong, not \xC0\x80 */ +        } +    } + +out: +    *end = (char *)p; +    return cp; +} diff --git a/util/uri.c b/util/uri.c new file mode 100644 index 00000000..550b9845 --- /dev/null +++ b/util/uri.c @@ -0,0 +1,2204 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFCs 3986, 2732 and 2373 + * + * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * daniel@veillard.com + * + ** + * + * Copyright (C) 2007, 2009-2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA + * + * Authors: + *    Richard W.M. Jones <rjones@redhat.com> + * + */ + +#include <glib.h> +#include <string.h> +#include <stdio.h> + +#include "qemu/uri.h" + +static void uri_clean(URI *uri); + +/* + * Old rule from 2396 used in legacy handling code + * alpha    = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + *            "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + *           "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +#ifdef IS_DIGIT +#undef IS_DIGIT +#endif +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \ +    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \ +    ((x) == '(') || ((x) == ')')) + +/* + * unwise = "{" | "}" | "|" | "\" | "^" | "`" + */ + +#define IS_UNWISE(p)                                                    \ +      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \ +       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \ +       ((*(p) == ']')) || ((*(p) == '`'))) +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | + *            "[" | "]" + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ +        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ +        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ +        ((x) == ']')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + *    authority     = server | reg_name + *    reg_name      = 1*( unreserved | escaped | "$" | "," | + *                        ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path          = [ abs_path | opaque_part ] + */ + + +/************************************************************************ + *									* + *                         RFC 3986 parser				* + *									* + ************************************************************************/ + +#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) +#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\ +                      ((*(p) >= 'A') && (*(p) <= 'Z'))) +#define ISA_HEXDIG(p)							\ +       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\ +        ((*(p) >= 'A') && (*(p) <= 'F'))) + +/* + *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" + *                     / "*" / "+" / "," / ";" / "=" + */ +#define ISA_SUB_DELIM(p)						\ +      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\ +       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\ +       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\ +       ((*(p) == '=')) || ((*(p) == '\''))) + +/* + *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@" + */ +#define ISA_GEN_DELIM(p)						\ +      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \ +       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \ +       ((*(p) == '@'))) + +/* + *    reserved      = gen-delims / sub-delims + */ +#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) + +/* + *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ +#define ISA_UNRESERVED(p)						\ +      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\ +       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) + +/* + *    pct-encoded   = "%" HEXDIG HEXDIG + */ +#define ISA_PCT_ENCODED(p)						\ +     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) + +/* + *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@" + */ +#define ISA_PCHAR(p)							\ +     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\ +      ((*(p) == ':')) || ((*(p) == '@'))) + +/** + * rfc3986_parse_scheme: + * @uri:  pointer to an URI structure + * @str:  pointer to the string to analyze + * + * Parse an URI scheme + * + * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_scheme(URI *uri, const char **str) { +    const char *cur; + +    if (str == NULL) +	return(-1); + +    cur = *str; +    if (!ISA_ALPHA(cur)) +	return(2); +    cur++; +    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || +           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; +    if (uri != NULL) { +        g_free(uri->scheme); +	uri->scheme = g_strndup(*str, cur - *str); +    } +    *str = cur; +    return(0); +} + +/** + * rfc3986_parse_fragment: + * @uri:  pointer to an URI structure + * @str:  pointer to the string to analyze + * + * Parse the query part of an URI + * + * fragment      = *( pchar / "/" / "?" ) + * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' + *       in the fragment identifier but this is used very broadly for + *       xpointer scheme selection, so we are allowing it here to not break + *       for example all the DocBook processing chains. + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_fragment(URI *uri, const char **str) +{ +    const char *cur; + +    if (str == NULL) +        return (-1); + +    cur = *str; + +    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || +           (*cur == '[') || (*cur == ']') || +           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) +        NEXT(cur); +    if (uri != NULL) { +        g_free(uri->fragment); +	if (uri->cleanup & 2) +	    uri->fragment = g_strndup(*str, cur - *str); +	else +	    uri->fragment = uri_string_unescape(*str, cur - *str, NULL); +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_query: + * @uri:  pointer to an URI structure + * @str:  pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_query(URI *uri, const char **str) +{ +    const char *cur; + +    if (str == NULL) +        return (-1); + +    cur = *str; + +    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || +           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) +        NEXT(cur); +    if (uri != NULL) { +        g_free(uri->query); +	uri->query = g_strndup (*str, cur - *str); +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_port: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse a port  part and fills in the appropriate fields + * of the @uri structure + * + * port          = *DIGIT + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_port(URI *uri, const char **str) +{ +    const char *cur = *str; +    int port = 0; + +    if (ISA_DIGIT(cur)) { +        while (ISA_DIGIT(cur)) { +            port = port * 10 + (*cur - '0'); +            if (port > 65535) { +                return 1; +            } +            cur++; +        } +        if (uri) { +            uri->port = port; +        } +        *str = cur; +        return 0; +    } +    return 1; +} + +/** + * rfc3986_parse_user_info: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an user informations part and fills in the appropriate fields + * of the @uri structure + * + * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_user_info(URI *uri, const char **str) +{ +    const char *cur; + +    cur = *str; +    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || +           ISA_SUB_DELIM(cur) || (*cur == ':')) +	NEXT(cur); +    if (*cur == '@') { +	if (uri != NULL) { +            g_free(uri->user); +	    if (uri->cleanup & 2) +		uri->user = g_strndup(*str, cur - *str); +	    else +		uri->user = uri_string_unescape(*str, cur - *str, NULL); +	} +	*str = cur; +	return(0); +    } +    return(1); +} + +/** + * rfc3986_parse_dec_octet: + * @str:  the string to analyze + * + *    dec-octet     = DIGIT                 ; 0-9 + *                  / %x31-39 DIGIT         ; 10-99 + *                  / "1" 2DIGIT            ; 100-199 + *                  / "2" %x30-34 DIGIT     ; 200-249 + *                  / "25" %x30-35          ; 250-255 + * + * Skip a dec-octet. + * + * Returns 0 if found and skipped, 1 otherwise + */ +static int +rfc3986_parse_dec_octet(const char **str) { +    const char *cur = *str; + +    if (!(ISA_DIGIT(cur))) +        return(1); +    if (!ISA_DIGIT(cur+1)) +	cur++; +    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) +	cur += 2; +    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) +	cur += 3; +    else if ((*cur == '2') && (*(cur + 1) >= '0') && +	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) +	cur += 3; +    else if ((*cur == '2') && (*(cur + 1) == '5') && +	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) +	cur += 3; +    else +        return(1); +    *str = cur; +    return(0); +} +/** + * rfc3986_parse_host: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an host part and fills in the appropriate fields + * of the @uri structure + * + * host          = IP-literal / IPv4address / reg-name + * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]" + * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet + * reg-name      = *( unreserved / pct-encoded / sub-delims ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_host(URI *uri, const char **str) +{ +    const char *cur = *str; +    const char *host; + +    host = cur; +    /* +     * IPv6 and future addressing scheme are enclosed between brackets +     */ +    if (*cur == '[') { +        cur++; +	while ((*cur != ']') && (*cur != 0)) +	    cur++; +	if (*cur != ']') +	    return(1); +	cur++; +	goto found; +    } +    /* +     * try to parse an IPv4 +     */ +    if (ISA_DIGIT(cur)) { +        if (rfc3986_parse_dec_octet(&cur) != 0) +	    goto not_ipv4; +	if (*cur != '.') +	    goto not_ipv4; +	cur++; +        if (rfc3986_parse_dec_octet(&cur) != 0) +	    goto not_ipv4; +	if (*cur != '.') +	    goto not_ipv4; +        if (rfc3986_parse_dec_octet(&cur) != 0) +	    goto not_ipv4; +	if (*cur != '.') +	    goto not_ipv4; +        if (rfc3986_parse_dec_octet(&cur) != 0) +	    goto not_ipv4; +	goto found; +not_ipv4: +        cur = *str; +    } +    /* +     * then this should be a hostname which can be empty +     */ +    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) +        NEXT(cur); +found: +    if (uri != NULL) { +        g_free(uri->authority); +	uri->authority = NULL; +        g_free(uri->server); +	if (cur != host) { +	    if (uri->cleanup & 2) +		uri->server = g_strndup(host, cur - host); +	    else +		uri->server = uri_string_unescape(host, cur - host, NULL); +	} else +	    uri->server = NULL; +    } +    *str = cur; +    return(0); +} + +/** + * rfc3986_parse_authority: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an authority part and fills in the appropriate fields + * of the @uri structure + * + * authority     = [ userinfo "@" ] host [ ":" port ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_authority(URI *uri, const char **str) +{ +    const char *cur; +    int ret; + +    cur = *str; +    /* +     * try to parse an userinfo and check for the trailing @ +     */ +    ret = rfc3986_parse_user_info(uri, &cur); +    if ((ret != 0) || (*cur != '@')) +        cur = *str; +    else +        cur++; +    ret = rfc3986_parse_host(uri, &cur); +    if (ret != 0) return(ret); +    if (*cur == ':') { +        cur++; +        ret = rfc3986_parse_port(uri, &cur); +	if (ret != 0) return(ret); +    } +    *str = cur; +    return(0); +} + +/** + * rfc3986_parse_segment: + * @str:  the string to analyze + * @forbid: an optional forbidden character + * @empty: allow an empty segment + * + * Parse a segment and fills in the appropriate fields + * of the @uri structure + * + * segment       = *pchar + * segment-nz    = 1*pchar + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + *               ; non-zero-length segment without any colon ":" + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_segment(const char **str, char forbid, int empty) +{ +    const char *cur; + +    cur = *str; +    if (!ISA_PCHAR(cur)) { +        if (empty) +	    return(0); +	return(1); +    } +    while (ISA_PCHAR(cur) && (*cur != forbid)) +        NEXT(cur); +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_path_ab_empty: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an path absolute or empty and fills in the appropriate fields + * of the @uri structure + * + * path-abempty  = *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_ab_empty(URI *uri, const char **str) +{ +    const char *cur; +    int ret; + +    cur = *str; + +    while (*cur == '/') { +        cur++; +	ret = rfc3986_parse_segment(&cur, 0, 1); +	if (ret != 0) return(ret); +    } +    if (uri != NULL) { +        g_free(uri->path); +        if (*str != cur) { +            if (uri->cleanup & 2) +                uri->path = g_strndup(*str, cur - *str); +            else +                uri->path = uri_string_unescape(*str, cur - *str, NULL); +        } else { +            uri->path = NULL; +        } +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_path_absolute: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an path absolute and fills in the appropriate fields + * of the @uri structure + * + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_absolute(URI *uri, const char **str) +{ +    const char *cur; +    int ret; + +    cur = *str; + +    if (*cur != '/') +        return(1); +    cur++; +    ret = rfc3986_parse_segment(&cur, 0, 0); +    if (ret == 0) { +	while (*cur == '/') { +	    cur++; +	    ret = rfc3986_parse_segment(&cur, 0, 1); +	    if (ret != 0) return(ret); +	} +    } +    if (uri != NULL) { +        g_free(uri->path); +        if (cur != *str) { +            if (uri->cleanup & 2) +                uri->path = g_strndup(*str, cur - *str); +            else +                uri->path = uri_string_unescape(*str, cur - *str, NULL); +        } else { +            uri->path = NULL; +        } +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_path_rootless: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an path without root and fills in the appropriate fields + * of the @uri structure + * + * path-rootless = segment-nz *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_rootless(URI *uri, const char **str) +{ +    const char *cur; +    int ret; + +    cur = *str; + +    ret = rfc3986_parse_segment(&cur, 0, 0); +    if (ret != 0) return(ret); +    while (*cur == '/') { +        cur++; +	ret = rfc3986_parse_segment(&cur, 0, 1); +	if (ret != 0) return(ret); +    } +    if (uri != NULL) { +        g_free(uri->path); +        if (cur != *str) { +            if (uri->cleanup & 2) +                uri->path = g_strndup(*str, cur - *str); +            else +                uri->path = uri_string_unescape(*str, cur - *str, NULL); +        } else { +            uri->path = NULL; +        } +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_path_no_scheme: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an path which is not a scheme and fills in the appropriate fields + * of the @uri structure + * + * path-noscheme = segment-nz-nc *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_no_scheme(URI *uri, const char **str) +{ +    const char *cur; +    int ret; + +    cur = *str; + +    ret = rfc3986_parse_segment(&cur, ':', 0); +    if (ret != 0) return(ret); +    while (*cur == '/') { +        cur++; +	ret = rfc3986_parse_segment(&cur, 0, 1); +	if (ret != 0) return(ret); +    } +    if (uri != NULL) { +        g_free(uri->path); +        if (cur != *str) { +            if (uri->cleanup & 2) +                uri->path = g_strndup(*str, cur - *str); +            else +                uri->path = uri_string_unescape(*str, cur - *str, NULL); +        } else { +            uri->path = NULL; +        } +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_hier_part: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an hierarchical part and fills in the appropriate fields + * of the @uri structure + * + * hier-part     = "//" authority path-abempty + *                / path-absolute + *                / path-rootless + *                / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_hier_part(URI *uri, const char **str) +{ +    const char *cur; +    int ret; + +    cur = *str; + +    if ((*cur == '/') && (*(cur + 1) == '/')) { +        cur += 2; +	ret = rfc3986_parse_authority(uri, &cur); +	if (ret != 0) return(ret); +	ret = rfc3986_parse_path_ab_empty(uri, &cur); +	if (ret != 0) return(ret); +	*str = cur; +	return(0); +    } else if (*cur == '/') { +        ret = rfc3986_parse_path_absolute(uri, &cur); +	if (ret != 0) return(ret); +    } else if (ISA_PCHAR(cur)) { +        ret = rfc3986_parse_path_rootless(uri, &cur); +	if (ret != 0) return(ret); +    } else { +	/* path-empty is effectively empty */ +	if (uri != NULL) { +            g_free(uri->path); +	    uri->path = NULL; +	} +    } +    *str = cur; +    return (0); +} + +/** + * rfc3986_parse_relative_ref: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * relative-ref  = relative-part [ "?" query ] [ "#" fragment ] + * relative-part = "//" authority path-abempty + *               / path-absolute + *               / path-noscheme + *               / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_relative_ref(URI *uri, const char *str) { +    int ret; + +    if ((*str == '/') && (*(str + 1) == '/')) { +        str += 2; +	ret = rfc3986_parse_authority(uri, &str); +	if (ret != 0) return(ret); +	ret = rfc3986_parse_path_ab_empty(uri, &str); +	if (ret != 0) return(ret); +    } else if (*str == '/') { +	ret = rfc3986_parse_path_absolute(uri, &str); +	if (ret != 0) return(ret); +    } else if (ISA_PCHAR(str)) { +        ret = rfc3986_parse_path_no_scheme(uri, &str); +	if (ret != 0) return(ret); +    } else { +	/* path-empty is effectively empty */ +	if (uri != NULL) { +            g_free(uri->path); +	    uri->path = NULL; +	} +    } + +    if (*str == '?') { +	str++; +	ret = rfc3986_parse_query(uri, &str); +	if (ret != 0) return(ret); +    } +    if (*str == '#') { +	str++; +	ret = rfc3986_parse_fragment(uri, &str); +	if (ret != 0) return(ret); +    } +    if (*str != 0) { +	uri_clean(uri); +	return(1); +    } +    return(0); +} + + +/** + * rfc3986_parse: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse(URI *uri, const char *str) { +    int ret; + +    ret = rfc3986_parse_scheme(uri, &str); +    if (ret != 0) return(ret); +    if (*str != ':') { +	return(1); +    } +    str++; +    ret = rfc3986_parse_hier_part(uri, &str); +    if (ret != 0) return(ret); +    if (*str == '?') { +	str++; +	ret = rfc3986_parse_query(uri, &str); +	if (ret != 0) return(ret); +    } +    if (*str == '#') { +	str++; +	ret = rfc3986_parse_fragment(uri, &str); +	if (ret != 0) return(ret); +    } +    if (*str != 0) { +	uri_clean(uri); +	return(1); +    } +    return(0); +} + +/** + * rfc3986_parse_uri_reference: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_uri_reference(URI *uri, const char *str) { +    int ret; + +    if (str == NULL) +	return(-1); +    uri_clean(uri); + +    /* +     * Try first to parse absolute refs, then fallback to relative if +     * it fails. +     */ +    ret = rfc3986_parse(uri, str); +    if (ret != 0) { +	uri_clean(uri); +        ret = rfc3986_parse_relative_ref(uri, str); +	if (ret != 0) { +	    uri_clean(uri); +	    return(ret); +	} +    } +    return(0); +} + +/** + * uri_parse: + * @str:  the URI string to analyze + * + * Parse an URI based on RFC 3986 + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse(const char *str) { +    URI *uri; +    int ret; + +    if (str == NULL) +	return(NULL); +    uri = uri_new(); +    ret = rfc3986_parse_uri_reference(uri, str); +    if (ret) { +        uri_free(uri); +        return(NULL); +    } +    return(uri); +} + +/** + * uri_parse_into: + * @uri:  pointer to an URI structure + * @str:  the string to analyze + * + * Parse an URI reference string based on RFC 3986 and fills in the + * appropriate fields of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +int +uri_parse_into(URI *uri, const char *str) { +    return(rfc3986_parse_uri_reference(uri, str)); +} + +/** + * uri_parse_raw: + * @str:  the URI string to analyze + * @raw:  if 1 unescaping of URI pieces are disabled + * + * Parse an URI but allows to keep intact the original fragments. + * + * URI-reference = URI / relative-ref + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse_raw(const char *str, int raw) { +    URI *uri; +    int ret; + +    if (str == NULL) +	return(NULL); +    uri = uri_new(); +    if (raw) { +        uri->cleanup |= 2; +    } +    ret = uri_parse_into(uri, str); +    if (ret) { +        uri_free(uri); +        return(NULL); +    } +    return(uri); +} + +/************************************************************************ + *									* + *			Generic URI structure functions			* + *									* + ************************************************************************/ + +/** + * uri_new: + * + * Simply creates an empty URI + * + * Returns the new structure or NULL in case of error + */ +URI * +uri_new(void) { +    URI *ret; + +    ret = g_new0(URI, 1); +    return(ret); +} + +/** + * realloc2n: + * + * Function to handle properly a reallocation when saving an URI + * Also imposes some limit on the length of an URI string output + */ +static char * +realloc2n(char *ret, int *max) { +    char *temp; +    int tmp; + +    tmp = *max * 2; +    temp = g_realloc(ret, (tmp + 1)); +    *max = tmp; +    return(temp); +} + +/** + * uri_to_string: + * @uri:  pointer to an URI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +char * +uri_to_string(URI *uri) { +    char *ret = NULL; +    char *temp; +    const char *p; +    int len; +    int max; + +    if (uri == NULL) return(NULL); + + +    max = 80; +    ret = g_malloc(max + 1); +    len = 0; + +    if (uri->scheme != NULL) { +	p = uri->scheme; +	while (*p != 0) { +	    if (len >= max) { +                temp = realloc2n(ret, &max); +		ret = temp; +	    } +	    ret[len++] = *p++; +	} +	if (len >= max) { +            temp = realloc2n(ret, &max); +            ret = temp; +	} +	ret[len++] = ':'; +    } +    if (uri->opaque != NULL) { +	p = uri->opaque; +	while (*p != 0) { +	    if (len + 3 >= max) { +                temp = realloc2n(ret, &max); +                ret = temp; +	    } +	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) +		ret[len++] = *p++; +	    else { +		int val = *(unsigned char *)p++; +		int hi = val / 0x10, lo = val % 0x10; +		ret[len++] = '%'; +		ret[len++] = hi + (hi > 9? 'A'-10 : '0'); +		ret[len++] = lo + (lo > 9? 'A'-10 : '0'); +	    } +	} +    } else { +	if (uri->server != NULL) { +	    if (len + 3 >= max) { +                temp = realloc2n(ret, &max); +                ret = temp; +	    } +	    ret[len++] = '/'; +	    ret[len++] = '/'; +	    if (uri->user != NULL) { +		p = uri->user; +		while (*p != 0) { +		    if (len + 3 >= max) { +                        temp = realloc2n(ret, &max); +                        ret = temp; +		    } +		    if ((IS_UNRESERVED(*(p))) || +			((*(p) == ';')) || ((*(p) == ':')) || +			((*(p) == '&')) || ((*(p) == '=')) || +			((*(p) == '+')) || ((*(p) == '$')) || +			((*(p) == ','))) +			ret[len++] = *p++; +		    else { +			int val = *(unsigned char *)p++; +			int hi = val / 0x10, lo = val % 0x10; +			ret[len++] = '%'; +			ret[len++] = hi + (hi > 9? 'A'-10 : '0'); +			ret[len++] = lo + (lo > 9? 'A'-10 : '0'); +		    } +		} +		if (len + 3 >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		ret[len++] = '@'; +	    } +	    p = uri->server; +	    while (*p != 0) { +		if (len >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		ret[len++] = *p++; +	    } +	    if (uri->port > 0) { +		if (len + 10 >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		len += snprintf(&ret[len], max - len, ":%d", uri->port); +	    } +	} else if (uri->authority != NULL) { +	    if (len + 3 >= max) { +                temp = realloc2n(ret, &max); +                ret = temp; +	    } +	    ret[len++] = '/'; +	    ret[len++] = '/'; +	    p = uri->authority; +	    while (*p != 0) { +		if (len + 3 >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		if ((IS_UNRESERVED(*(p))) || +                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || +                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || +                    ((*(p) == '=')) || ((*(p) == '+'))) +		    ret[len++] = *p++; +		else { +		    int val = *(unsigned char *)p++; +		    int hi = val / 0x10, lo = val % 0x10; +		    ret[len++] = '%'; +		    ret[len++] = hi + (hi > 9? 'A'-10 : '0'); +		    ret[len++] = lo + (lo > 9? 'A'-10 : '0'); +		} +	    } +	} else if (uri->scheme != NULL) { +	    if (len + 3 >= max) { +                temp = realloc2n(ret, &max); +                ret = temp; +	    } +	    ret[len++] = '/'; +	    ret[len++] = '/'; +	} +	if (uri->path != NULL) { +	    p = uri->path; +	    /* +	     * the colon in file:///d: should not be escaped or +	     * Windows accesses fail later. +	     */ +	    if ((uri->scheme != NULL) && +		(p[0] == '/') && +		(((p[1] >= 'a') && (p[1] <= 'z')) || +		 ((p[1] >= 'A') && (p[1] <= 'Z'))) && +		(p[2] == ':') && +	        (!strcmp(uri->scheme, "file"))) { +		if (len + 3 >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		ret[len++] = *p++; +		ret[len++] = *p++; +		ret[len++] = *p++; +	    } +	    while (*p != 0) { +		if (len + 3 >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || +                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || +	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || +	            ((*(p) == ','))) +		    ret[len++] = *p++; +		else { +		    int val = *(unsigned char *)p++; +		    int hi = val / 0x10, lo = val % 0x10; +		    ret[len++] = '%'; +		    ret[len++] = hi + (hi > 9? 'A'-10 : '0'); +		    ret[len++] = lo + (lo > 9? 'A'-10 : '0'); +		} +	    } +	} +	if (uri->query != NULL) { +	    if (len + 1 >= max) { +                temp = realloc2n(ret, &max); +                ret = temp; +	    } +	    ret[len++] = '?'; +	    p = uri->query; +	    while (*p != 0) { +		if (len + 1 >= max) { +                    temp = realloc2n(ret, &max); +                    ret = temp; +		} +		ret[len++] = *p++; +	    } +	} +    } +    if (uri->fragment != NULL) { +	if (len + 3 >= max) { +            temp = realloc2n(ret, &max); +            ret = temp; +	} +	ret[len++] = '#'; +	p = uri->fragment; +	while (*p != 0) { +	    if (len + 3 >= max) { +                temp = realloc2n(ret, &max); +                ret = temp; +	    } +	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) +		ret[len++] = *p++; +	    else { +		int val = *(unsigned char *)p++; +		int hi = val / 0x10, lo = val % 0x10; +		ret[len++] = '%'; +		ret[len++] = hi + (hi > 9? 'A'-10 : '0'); +		ret[len++] = lo + (lo > 9? 'A'-10 : '0'); +	    } +	} +    } +    if (len >= max) { +        temp = realloc2n(ret, &max); +        ret = temp; +    } +    ret[len] = 0; +    return(ret); +} + +/** + * uri_clean: + * @uri:  pointer to an URI + * + * Make sure the URI struct is free of content + */ +static void +uri_clean(URI *uri) { +    if (uri == NULL) return; + +    g_free(uri->scheme); +    uri->scheme = NULL; +    g_free(uri->server); +    uri->server = NULL; +    g_free(uri->user); +    uri->user = NULL; +    g_free(uri->path); +    uri->path = NULL; +    g_free(uri->fragment); +    uri->fragment = NULL; +    g_free(uri->opaque); +    uri->opaque = NULL; +    g_free(uri->authority); +    uri->authority = NULL; +    g_free(uri->query); +    uri->query = NULL; +} + +/** + * uri_free: + * @uri:  pointer to an URI + * + * Free up the URI struct + */ +void +uri_free(URI *uri) { +    uri_clean(uri); +    g_free(uri); +} + +/************************************************************************ + *									* + *			Helper functions				* + *									* + ************************************************************************/ + +/** + * normalize_uri_path: + * @path:  pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +static int +normalize_uri_path(char *path) { +    char *cur, *out; + +    if (path == NULL) +	return(-1); + +    /* Skip all initial "/" chars.  We want to get to the beginning of the +     * first non-empty segment. +     */ +    cur = path; +    while (cur[0] == '/') +      ++cur; +    if (cur[0] == '\0') +      return(0); + +    /* Keep everything we've seen so far.  */ +    out = cur; + +    /* +     * Analyze each segment in sequence for cases (c) and (d). +     */ +    while (cur[0] != '\0') { +	/* +	 * c) All occurrences of "./", where "." is a complete path segment, +	 *    are removed from the buffer string. +	 */ +	if ((cur[0] == '.') && (cur[1] == '/')) { +	    cur += 2; +	    /* '//' normalization should be done at this point too */ +	    while (cur[0] == '/') +		cur++; +	    continue; +	} + +	/* +	 * d) If the buffer string ends with "." as a complete path segment, +	 *    that "." is removed. +	 */ +	if ((cur[0] == '.') && (cur[1] == '\0')) +	    break; + +	/* Otherwise keep the segment.  */ +	while (cur[0] != '/') { +            if (cur[0] == '\0') +              goto done_cd; +	    (out++)[0] = (cur++)[0]; +	} +	/* nomalize // */ +	while ((cur[0] == '/') && (cur[1] == '/')) +	    cur++; + +        (out++)[0] = (cur++)[0]; +    } + done_cd: +    out[0] = '\0'; + +    /* Reset to the beginning of the first segment for the next sequence.  */ +    cur = path; +    while (cur[0] == '/') +      ++cur; +    if (cur[0] == '\0') +	return(0); + +    /* +     * Analyze each segment in sequence for cases (e) and (f). +     * +     * e) All occurrences of "<segment>/../", where <segment> is a +     *    complete path segment not equal to "..", are removed from the +     *    buffer string.  Removal of these path segments is performed +     *    iteratively, removing the leftmost matching pattern on each +     *    iteration, until no matching pattern remains. +     * +     * f) If the buffer string ends with "<segment>/..", where <segment> +     *    is a complete path segment not equal to "..", that +     *    "<segment>/.." is removed. +     * +     * To satisfy the "iterative" clause in (e), we need to collapse the +     * string every time we find something that needs to be removed.  Thus, +     * we don't need to keep two pointers into the string: we only need a +     * "current position" pointer. +     */ +    while (1) { +        char *segp, *tmp; + +        /* At the beginning of each iteration of this loop, "cur" points to +         * the first character of the segment we want to examine. +         */ + +        /* Find the end of the current segment.  */ +        segp = cur; +        while ((segp[0] != '/') && (segp[0] != '\0')) +          ++segp; + +        /* If this is the last segment, we're done (we need at least two +         * segments to meet the criteria for the (e) and (f) cases). +         */ +        if (segp[0] == '\0') +          break; + +        /* If the first segment is "..", or if the next segment _isn't_ "..", +         * keep this segment and try the next one. +         */ +        ++segp; +        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) +            || ((segp[0] != '.') || (segp[1] != '.') +                || ((segp[2] != '/') && (segp[2] != '\0')))) { +          cur = segp; +          continue; +        } + +        /* If we get here, remove this segment and the next one and back up +         * to the previous segment (if there is one), to implement the +         * "iteratively" clause.  It's pretty much impossible to back up +         * while maintaining two pointers into the buffer, so just compact +         * the whole buffer now. +         */ + +        /* If this is the end of the buffer, we're done.  */ +        if (segp[2] == '\0') { +          cur[0] = '\0'; +          break; +        } +        /* Valgrind complained, strcpy(cur, segp + 3); */ +        /* string will overlap, do not use strcpy */ +        tmp = cur; +        segp += 3; +        while ((*tmp++ = *segp++) != 0) +          ; + +        /* If there are no previous segments, then keep going from here.  */ +        segp = cur; +        while ((segp > path) && ((--segp)[0] == '/')) +          ; +        if (segp == path) +          continue; + +        /* "segp" is pointing to the end of a previous segment; find it's +         * start.  We need to back up to the previous segment and start +         * over with that to handle things like "foo/bar/../..".  If we +         * don't do this, then on the first pass we'll remove the "bar/..", +         * but be pointing at the second ".." so we won't realize we can also +         * remove the "foo/..". +         */ +        cur = segp; +        while ((cur > path) && (cur[-1] != '/')) +          --cur; +    } +    out[0] = '\0'; + +    /* +     * g) If the resulting buffer string still begins with one or more +     *    complete path segments of "..", then the reference is +     *    considered to be in error. Implementations may handle this +     *    error by retaining these components in the resolved path (i.e., +     *    treating them as part of the final URI), by removing them from +     *    the resolved path (i.e., discarding relative levels above the +     *    root), or by avoiding traversal of the reference. +     * +     * We discard them from the final path. +     */ +    if (path[0] == '/') { +      cur = path; +      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') +             && ((cur[3] == '/') || (cur[3] == '\0'))) +	cur += 3; + +      if (cur != path) { +	out = path; +	while (cur[0] != '\0') +          (out++)[0] = (cur++)[0]; +	out[0] = 0; +      } +    } + +    return(0); +} + +static int is_hex(char c) { +    if (((c >= '0') && (c <= '9')) || +        ((c >= 'a') && (c <= 'f')) || +        ((c >= 'A') && (c <= 'F'))) +	return(1); +    return(0); +} + + +/** + * uri_string_unescape: + * @str:  the string to unescape + * @len:   the length in bytes to unescape (or <= 0 to indicate full string) + * @target:  optional destination buffer + * + * Unescaping routine, but does not check that the string is an URI. The + * output is a direct unsigned char translation of %XX values (no encoding) + * Note that the length of the result can only be smaller or same size as + * the input string. + * + * Returns a copy of the string, but unescaped, will return NULL only in case + * of error + */ +char * +uri_string_unescape(const char *str, int len, char *target) { +    char *ret, *out; +    const char *in; + +    if (str == NULL) +	return(NULL); +    if (len <= 0) len = strlen(str); +    if (len < 0) return(NULL); + +    if (target == NULL) { +	ret = g_malloc(len + 1); +    } else +	ret = target; +    in = str; +    out = ret; +    while(len > 0) { +	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { +	    in++; +	    if ((*in >= '0') && (*in <= '9')) +	        *out = (*in - '0'); +	    else if ((*in >= 'a') && (*in <= 'f')) +	        *out = (*in - 'a') + 10; +	    else if ((*in >= 'A') && (*in <= 'F')) +	        *out = (*in - 'A') + 10; +	    in++; +	    if ((*in >= '0') && (*in <= '9')) +	        *out = *out * 16 + (*in - '0'); +	    else if ((*in >= 'a') && (*in <= 'f')) +	        *out = *out * 16 + (*in - 'a') + 10; +	    else if ((*in >= 'A') && (*in <= 'F')) +	        *out = *out * 16 + (*in - 'A') + 10; +	    in++; +	    len -= 3; +	    out++; +	} else { +	    *out++ = *in++; +	    len--; +	} +    } +    *out = 0; +    return(ret); +} + +/** + * uri_string_escape: + * @str:  string to escape + * @list: exception list string of chars not to escape + * + * This routine escapes a string to hex, ignoring reserved characters (a-z) + * and the characters in the exception list. + * + * Returns a new escaped string or NULL in case of error. + */ +char * +uri_string_escape(const char *str, const char *list) { +    char *ret, ch; +    char *temp; +    const char *in; +    int len, out; + +    if (str == NULL) +	return(NULL); +    if (str[0] == 0) +	return(g_strdup(str)); +    len = strlen(str); +    if (!(len > 0)) return(NULL); + +    len += 20; +    ret = g_malloc(len); +    in = str; +    out = 0; +    while(*in != 0) { +	if (len - out <= 3) { +            temp = realloc2n(ret, &len); +	    ret = temp; +	} + +	ch = *in; + +	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) { +	    unsigned char val; +	    ret[out++] = '%'; +	    val = ch >> 4; +	    if (val <= 9) +		ret[out++] = '0' + val; +	    else +		ret[out++] = 'A' + val - 0xA; +	    val = ch & 0xF; +	    if (val <= 9) +		ret[out++] = '0' + val; +	    else +		ret[out++] = 'A' + val - 0xA; +	    in++; +	} else { +	    ret[out++] = *in++; +	} + +    } +    ret[out] = 0; +    return(ret); +} + +/************************************************************************ + *									* + *			Public functions				* + *									* + ************************************************************************/ + +/** + * uri_resolve: + * @URI:  the URI instance found in the document + * @base:  the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + *         of error. + */ +char * +uri_resolve(const char *uri, const char *base) { +    char *val = NULL; +    int ret, len, indx, cur, out; +    URI *ref = NULL; +    URI *bas = NULL; +    URI *res = NULL; + +    /* +     * 1) The URI reference is parsed into the potential four components and +     *    fragment identifier, as described in Section 4.3. +     * +     *    NOTE that a completely empty URI is treated by modern browsers +     *    as a reference to "." rather than as a synonym for the current +     *    URI.  Should we do that here? +     */ +    if (uri == NULL) +	ret = -1; +    else { +	if (*uri) { +	    ref = uri_new(); +	    ret = uri_parse_into(ref, uri); +	} +	else +	    ret = 0; +    } +    if (ret != 0) +	goto done; +    if ((ref != NULL) && (ref->scheme != NULL)) { +	/* +	 * The URI is absolute don't modify. +	 */ +	val = g_strdup(uri); +	goto done; +    } +    if (base == NULL) +	ret = -1; +    else { +	bas = uri_new(); +	ret = uri_parse_into(bas, base); +    } +    if (ret != 0) { +	if (ref) +	    val = uri_to_string(ref); +	goto done; +    } +    if (ref == NULL) { +	/* +	 * the base fragment must be ignored +	 */ +        g_free(bas->fragment); +        bas->fragment = NULL; +	val = uri_to_string(bas); +	goto done; +    } + +    /* +     * 2) If the path component is empty and the scheme, authority, and +     *    query components are undefined, then it is a reference to the +     *    current document and we are done.  Otherwise, the reference URI's +     *    query and fragment components are defined as found (or not found) +     *    within the URI reference and not inherited from the base URI. +     * +     *    NOTE that in modern browsers, the parsing differs from the above +     *    in the following aspect:  the query component is allowed to be +     *    defined while still treating this as a reference to the current +     *    document. +     */ +    res = uri_new(); +    if ((ref->scheme == NULL) && (ref->path == NULL) && +	((ref->authority == NULL) && (ref->server == NULL))) { +        res->scheme = g_strdup(bas->scheme); +	if (bas->authority != NULL) +	    res->authority = g_strdup(bas->authority); +	else if (bas->server != NULL) { +            res->server = g_strdup(bas->server); +            res->user = g_strdup(bas->user); +            res->port = bas->port; +	} +        res->path = g_strdup(bas->path); +        if (ref->query != NULL) { +	    res->query = g_strdup (ref->query); +        } else { +            res->query = g_strdup(bas->query); +        } +        res->fragment = g_strdup(ref->fragment); +	goto step_7; +    } + +    /* +     * 3) If the scheme component is defined, indicating that the reference +     *    starts with a scheme name, then the reference is interpreted as an +     *    absolute URI and we are done.  Otherwise, the reference URI's +     *    scheme is inherited from the base URI's scheme component. +     */ +    if (ref->scheme != NULL) { +	val = uri_to_string(ref); +	goto done; +    } +    res->scheme = g_strdup(bas->scheme); + +    res->query = g_strdup(ref->query); +    res->fragment = g_strdup(ref->fragment); + +    /* +     * 4) If the authority component is defined, then the reference is a +     *    network-path and we skip to step 7.  Otherwise, the reference +     *    URI's authority is inherited from the base URI's authority +     *    component, which will also be undefined if the URI scheme does not +     *    use an authority component. +     */ +    if ((ref->authority != NULL) || (ref->server != NULL)) { +	if (ref->authority != NULL) +	    res->authority = g_strdup(ref->authority); +	else { +	    res->server = g_strdup(ref->server); +            res->user = g_strdup(ref->user); +            res->port = ref->port; +	} +        res->path = g_strdup(ref->path); +	goto step_7; +    } +    if (bas->authority != NULL) +	res->authority = g_strdup(bas->authority); +    else if (bas->server != NULL) { +        res->server = g_strdup(bas->server); +        res->user = g_strdup(bas->user); +	res->port = bas->port; +    } + +    /* +     * 5) If the path component begins with a slash character ("/"), then +     *    the reference is an absolute-path and we skip to step 7. +     */ +    if ((ref->path != NULL) && (ref->path[0] == '/')) { +	res->path = g_strdup(ref->path); +	goto step_7; +    } + + +    /* +     * 6) If this step is reached, then we are resolving a relative-path +     *    reference.  The relative path needs to be merged with the base +     *    URI's path.  Although there are many ways to do this, we will +     *    describe a simple method using a separate string buffer. +     * +     * Allocate a buffer large enough for the result string. +     */ +    len = 2; /* extra / and 0 */ +    if (ref->path != NULL) +	len += strlen(ref->path); +    if (bas->path != NULL) +	len += strlen(bas->path); +    res->path = g_malloc(len); +    res->path[0] = 0; + +    /* +     * a) All but the last segment of the base URI's path component is +     *    copied to the buffer.  In other words, any characters after the +     *    last (right-most) slash character, if any, are excluded. +     */ +    cur = 0; +    out = 0; +    if (bas->path != NULL) { +	while (bas->path[cur] != 0) { +	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) +		cur++; +	    if (bas->path[cur] == 0) +		break; + +	    cur++; +	    while (out < cur) { +		res->path[out] = bas->path[out]; +		out++; +	    } +	} +    } +    res->path[out] = 0; + +    /* +     * b) The reference's path component is appended to the buffer +     *    string. +     */ +    if (ref->path != NULL && ref->path[0] != 0) { +	indx = 0; +	/* +	 * Ensure the path includes a '/' +	 */ +	if ((out == 0) && (bas->server != NULL)) +	    res->path[out++] = '/'; +	while (ref->path[indx] != 0) { +	    res->path[out++] = ref->path[indx++]; +	} +    } +    res->path[out] = 0; + +    /* +     * Steps c) to h) are really path normalization steps +     */ +    normalize_uri_path(res->path); + +step_7: + +    /* +     * 7) The resulting URI components, including any inherited from the +     *    base URI, are recombined to give the absolute form of the URI +     *    reference. +     */ +    val = uri_to_string(res); + +done: +    if (ref != NULL) +	uri_free(ref); +    if (bas != NULL) +	uri_free(bas); +    if (res != NULL) +	uri_free(res); +    return(val); +} + +/** + * uri_resolve_relative: + * @URI:  the URI reference under consideration + * @base:  the base value + * + * Expresses the URI of the reference in terms relative to the + * base.  Some examples of this operation include: + *     base = "http://site1.com/docs/book1.html" + *        URI input                        URI returned + *     docs/pic1.gif                    pic1.gif + *     docs/img/pic1.gif                img/pic1.gif + *     img/pic1.gif                     ../img/pic1.gif + *     http://site1.com/docs/pic1.gif   pic1.gif + *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif + * + *     base = "docs/book1.html" + *        URI input                        URI returned + *     docs/pic1.gif                    pic1.gif + *     docs/img/pic1.gif                img/pic1.gif + *     img/pic1.gif                     ../img/pic1.gif + *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif + * + * + * Note: if the URI reference is really weird or complicated, it may be + *       worthwhile to first convert it into a "nice" one by calling + *       uri_resolve (using 'base') before calling this routine, + *       since this routine (for reasonable efficiency) assumes URI has + *       already been through some validation. + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * error. + */ +char * +uri_resolve_relative (const char *uri, const char * base) +{ +    char *val = NULL; +    int ret; +    int ix; +    int pos = 0; +    int nbslash = 0; +    int len; +    URI *ref = NULL; +    URI *bas = NULL; +    char *bptr, *uptr, *vptr; +    int remove_path = 0; + +    if ((uri == NULL) || (*uri == 0)) +	return NULL; + +    /* +     * First parse URI into a standard form +     */ +    ref = uri_new (); +    /* If URI not already in "relative" form */ +    if (uri[0] != '.') { +	ret = uri_parse_into (ref, uri); +	if (ret != 0) +	    goto done;		/* Error in URI, return NULL */ +    } else +	ref->path = g_strdup(uri); + +    /* +     * Next parse base into the same standard form +     */ +    if ((base == NULL) || (*base == 0)) { +	val = g_strdup (uri); +	goto done; +    } +    bas = uri_new (); +    if (base[0] != '.') { +	ret = uri_parse_into (bas, base); +	if (ret != 0) +	    goto done;		/* Error in base, return NULL */ +    } else +	bas->path = g_strdup(base); + +    /* +     * If the scheme / server on the URI differs from the base, +     * just return the URI +     */ +    if ((ref->scheme != NULL) && +	((bas->scheme == NULL) || +	 (strcmp (bas->scheme, ref->scheme)) || +	 (strcmp (bas->server, ref->server)))) { +	val = g_strdup (uri); +	goto done; +    } +    if (bas->path == ref->path || +        (bas->path && ref->path && !strcmp(bas->path, ref->path))) { +	val = g_strdup(""); +	goto done; +    } +    if (bas->path == NULL) { +	val = g_strdup(ref->path); +	goto done; +    } +    if (ref->path == NULL) { +        ref->path = (char *) "/"; +	remove_path = 1; +    } + +    /* +     * At this point (at last!) we can compare the two paths +     * +     * First we take care of the special case where either of the +     * two path components may be missing (bug 316224) +     */ +    if (bas->path == NULL) { +	if (ref->path != NULL) { +	    uptr = ref->path; +	    if (*uptr == '/') +		uptr++; +	    /* exception characters from uri_to_string */ +	    val = uri_string_escape(uptr, "/;&=+$,"); +	} +	goto done; +    } +    bptr = bas->path; +    if (ref->path == NULL) { +	for (ix = 0; bptr[ix] != 0; ix++) { +	    if (bptr[ix] == '/') +		nbslash++; +	} +	uptr = NULL; +	len = 1;	/* this is for a string terminator only */ +    } else { +    /* +     * Next we compare the two strings and find where they first differ +     */ +	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) +            pos += 2; +	if ((*bptr == '.') && (bptr[1] == '/')) +            bptr += 2; +	else if ((*bptr == '/') && (ref->path[pos] != '/')) +	    bptr++; +	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) +	    pos++; + +	if (bptr[pos] == ref->path[pos]) { +	    val = g_strdup(""); +	    goto done;		/* (I can't imagine why anyone would do this) */ +	} + +	/* +	 * In URI, "back up" to the last '/' encountered.  This will be the +	 * beginning of the "unique" suffix of URI +	 */ +	ix = pos; +	if ((ref->path[ix] == '/') && (ix > 0)) +	    ix--; +	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) +	    ix -= 2; +	for (; ix > 0; ix--) { +	    if (ref->path[ix] == '/') +		break; +	} +	if (ix == 0) { +	    uptr = ref->path; +	} else { +	    ix++; +	    uptr = &ref->path[ix]; +	} + +	/* +	 * In base, count the number of '/' from the differing point +	 */ +	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ +	    for (; bptr[ix] != 0; ix++) { +		if (bptr[ix] == '/') +		    nbslash++; +	    } +	} +	len = strlen (uptr) + 1; +    } + +    if (nbslash == 0) { +	if (uptr != NULL) +	    /* exception characters from uri_to_string */ +	    val = uri_string_escape(uptr, "/;&=+$,"); +	goto done; +    } + +    /* +     * Allocate just enough space for the returned string - +     * length of the remainder of the URI, plus enough space +     * for the "../" groups, plus one for the terminator +     */ +    val = g_malloc (len + 3 * nbslash); +    vptr = val; +    /* +     * Put in as many "../" as needed +     */ +    for (; nbslash>0; nbslash--) { +	*vptr++ = '.'; +	*vptr++ = '.'; +	*vptr++ = '/'; +    } +    /* +     * Finish up with the end of the URI +     */ +    if (uptr != NULL) { +        if ((vptr > val) && (len > 0) && +	    (uptr[0] == '/') && (vptr[-1] == '/')) { +	    memcpy (vptr, uptr + 1, len - 1); +	    vptr[len - 2] = 0; +	} else { +	    memcpy (vptr, uptr, len); +	    vptr[len - 1] = 0; +	} +    } else { +	vptr[len - 1] = 0; +    } + +    /* escape the freshly-built path */ +    vptr = val; +	/* exception characters from uri_to_string */ +    val = uri_string_escape(vptr, "/;&=+$,"); +    g_free(vptr); + +done: +    /* +     * Free the working variables +     */ +    if (remove_path != 0) +        ref->path = NULL; +    if (ref != NULL) +	uri_free (ref); +    if (bas != NULL) +	uri_free (bas); + +    return val; +} + +/* + * Utility functions to help parse and assemble query strings. + */ + +struct QueryParams * +query_params_new (int init_alloc) +{ +    struct QueryParams *ps; + +    if (init_alloc <= 0) init_alloc = 1; + +    ps = g_new(QueryParams, 1); +    ps->n = 0; +    ps->alloc = init_alloc; +    ps->p = g_new(QueryParam, ps->alloc); + +    return ps; +} + +/* Ensure there is space to store at least one more parameter + * at the end of the set. + */ +static int +query_params_append (struct QueryParams *ps, +               const char *name, const char *value) +{ +    if (ps->n >= ps->alloc) { +        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2); +        ps->alloc *= 2; +    } + +    ps->p[ps->n].name = g_strdup(name); +    ps->p[ps->n].value = g_strdup(value); +    ps->p[ps->n].ignore = 0; +    ps->n++; + +    return 0; +} + +void +query_params_free (struct QueryParams *ps) +{ +    int i; + +    for (i = 0; i < ps->n; ++i) { +        g_free (ps->p[i].name); +        g_free (ps->p[i].value); +    } +    g_free (ps->p); +    g_free (ps); +} + +struct QueryParams * +query_params_parse (const char *query) +{ +    struct QueryParams *ps; +    const char *end, *eq; + +    ps = query_params_new (0); +    if (!query || query[0] == '\0') return ps; + +    while (*query) { +        char *name = NULL, *value = NULL; + +        /* Find the next separator, or end of the string. */ +        end = strchr (query, '&'); +        if (!end) +            end = strchr (query, ';'); +        if (!end) +            end = query + strlen (query); + +        /* Find the first '=' character between here and end. */ +        eq = strchr (query, '='); +        if (eq && eq >= end) eq = NULL; + +        /* Empty section (eg. "&&"). */ +        if (end == query) +            goto next; + +        /* If there is no '=' character, then we have just "name" +         * and consistent with CGI.pm we assume value is "". +         */ +        else if (!eq) { +            name = uri_string_unescape (query, end - query, NULL); +            value = NULL; +        } +        /* Or if we have "name=" here (works around annoying +         * problem when calling uri_string_unescape with len = 0). +         */ +        else if (eq+1 == end) { +            name = uri_string_unescape (query, eq - query, NULL); +            value = g_new0(char, 1); +        } +        /* If the '=' character is at the beginning then we have +         * "=value" and consistent with CGI.pm we _ignore_ this. +         */ +        else if (query == eq) +            goto next; + +        /* Otherwise it's "name=value". */ +        else { +            name = uri_string_unescape (query, eq - query, NULL); +            value = uri_string_unescape (eq+1, end - (eq+1), NULL); +        } + +        /* Append to the parameter set. */ +        query_params_append (ps, name, value); +        g_free(name); +        g_free(value); + +    next: +        query = end; +        if (*query) query ++; /* skip '&' separator */ +    } + +    return ps; +} | 
