diff options
Diffstat (limited to 'hw/block')
| -rw-r--r-- | hw/block/Makefile.objs | 15 | ||||
| -rw-r--r-- | hw/block/block.c | 91 | ||||
| -rw-r--r-- | hw/block/cdrom.c | 155 | ||||
| -rw-r--r-- | hw/block/dataplane/Makefile.objs | 1 | ||||
| -rw-r--r-- | hw/block/dataplane/virtio-blk.c | 341 | ||||
| -rw-r--r-- | hw/block/dataplane/virtio-blk.h | 30 | ||||
| -rw-r--r-- | hw/block/ecc.c | 90 | ||||
| -rw-r--r-- | hw/block/fdc.c | 2529 | ||||
| -rw-r--r-- | hw/block/hd-geometry.c | 165 | ||||
| -rw-r--r-- | hw/block/m25p80.c | 711 | ||||
| -rw-r--r-- | hw/block/nand.c | 799 | ||||
| -rw-r--r-- | hw/block/nvme.c | 967 | ||||
| -rw-r--r-- | hw/block/nvme.h | 712 | ||||
| -rw-r--r-- | hw/block/onenand.c | 848 | ||||
| -rw-r--r-- | hw/block/pflash_cfi01.c | 954 | ||||
| -rw-r--r-- | hw/block/pflash_cfi02.c | 795 | ||||
| -rw-r--r-- | hw/block/tc58128.c | 180 | ||||
| -rw-r--r-- | hw/block/virtio-blk.c | 1018 | ||||
| -rw-r--r-- | hw/block/xen_blkif.h | 115 | ||||
| -rw-r--r-- | hw/block/xen_disk.c | 1106 | 
20 files changed, 11622 insertions, 0 deletions
diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs new file mode 100644 index 00000000..d4c3ab75 --- /dev/null +++ b/hw/block/Makefile.objs @@ -0,0 +1,15 @@ +common-obj-y += block.o cdrom.o hd-geometry.o +common-obj-$(CONFIG_FDC) += fdc.o +common-obj-$(CONFIG_SSI_M25P80) += m25p80.o +common-obj-$(CONFIG_NAND) += nand.o +common-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o +common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o +common-obj-$(CONFIG_XEN_BACKEND) += xen_disk.o +common-obj-$(CONFIG_ECC) += ecc.o +common-obj-$(CONFIG_ONENAND) += onenand.o +common-obj-$(CONFIG_NVME_PCI) += nvme.o + +obj-$(CONFIG_SH4) += tc58128.o + +obj-$(CONFIG_VIRTIO) += virtio-blk.o +obj-$(CONFIG_VIRTIO) += dataplane/ diff --git a/hw/block/block.c b/hw/block/block.c new file mode 100644 index 00000000..f7243e5b --- /dev/null +++ b/hw/block/block.c @@ -0,0 +1,91 @@ +/* + * Common code for block device models + * + * Copyright (C) 2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later.  See the COPYING file in the top-level directory. + */ + +#include "sysemu/blockdev.h" +#include "sysemu/block-backend.h" +#include "hw/block/block.h" +#include "qemu/error-report.h" + +void blkconf_serial(BlockConf *conf, char **serial) +{ +    DriveInfo *dinfo; + +    if (!*serial) { +        /* try to fall back to value set with legacy -drive serial=... */ +        dinfo = blk_legacy_dinfo(conf->blk); +        if (dinfo) { +            *serial = g_strdup(dinfo->serial); +        } +    } +} + +void blkconf_blocksizes(BlockConf *conf) +{ +    BlockBackend *blk = conf->blk; +    BlockSizes blocksizes; +    int backend_ret; + +    backend_ret = blk_probe_blocksizes(blk, &blocksizes); +    /* fill in detected values if they are not defined via qemu command line */ +    if (!conf->physical_block_size) { +        if (!backend_ret) { +           conf->physical_block_size = blocksizes.phys; +        } else { +            conf->physical_block_size = BDRV_SECTOR_SIZE; +        } +    } +    if (!conf->logical_block_size) { +        if (!backend_ret) { +            conf->logical_block_size = blocksizes.log; +        } else { +            conf->logical_block_size = BDRV_SECTOR_SIZE; +        } +    } +} + +void blkconf_geometry(BlockConf *conf, int *ptrans, +                      unsigned cyls_max, unsigned heads_max, unsigned secs_max, +                      Error **errp) +{ +    DriveInfo *dinfo; + +    if (!conf->cyls && !conf->heads && !conf->secs) { +        /* try to fall back to value set with legacy -drive cyls=... */ +        dinfo = blk_legacy_dinfo(conf->blk); +        if (dinfo) { +            conf->cyls  = dinfo->cyls; +            conf->heads = dinfo->heads; +            conf->secs  = dinfo->secs; +            if (ptrans) { +                *ptrans = dinfo->trans; +            } +        } +    } +    if (!conf->cyls && !conf->heads && !conf->secs) { +        hd_geometry_guess(conf->blk, +                          &conf->cyls, &conf->heads, &conf->secs, +                          ptrans); +    } else if (ptrans && *ptrans == BIOS_ATA_TRANSLATION_AUTO) { +        *ptrans = hd_bios_chs_auto_trans(conf->cyls, conf->heads, conf->secs); +    } +    if (conf->cyls || conf->heads || conf->secs) { +        if (conf->cyls < 1 || conf->cyls > cyls_max) { +            error_setg(errp, "cyls must be between 1 and %u", cyls_max); +            return; +        } +        if (conf->heads < 1 || conf->heads > heads_max) { +            error_setg(errp, "heads must be between 1 and %u", heads_max); +            return; +        } +        if (conf->secs < 1 || conf->secs > secs_max) { +            error_setg(errp, "secs must be between 1 and %u", secs_max); +            return; +        } +    } +} diff --git a/hw/block/cdrom.c b/hw/block/cdrom.c new file mode 100644 index 00000000..4e1019c8 --- /dev/null +++ b/hw/block/cdrom.c @@ -0,0 +1,155 @@ +/* + * QEMU ATAPI CD-ROM Emulator + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* ??? Most of the ATAPI emulation is still in ide.c.  It should be moved +   here.  */ + +#include "qemu-common.h" +#include "hw/scsi/scsi.h" + +static void lba_to_msf(uint8_t *buf, int lba) +{ +    lba += 150; +    buf[0] = (lba / 75) / 60; +    buf[1] = (lba / 75) % 60; +    buf[2] = lba % 75; +} + +/* same toc as bochs. Return -1 if error or the toc length */ +/* XXX: check this */ +int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track) +{ +    uint8_t *q; +    int len; + +    if (start_track > 1 && start_track != 0xaa) +        return -1; +    q = buf + 2; +    *q++ = 1; /* first session */ +    *q++ = 1; /* last session */ +    if (start_track <= 1) { +        *q++ = 0; /* reserved */ +        *q++ = 0x14; /* ADR, control */ +        *q++ = 1;    /* track number */ +        *q++ = 0; /* reserved */ +        if (msf) { +            *q++ = 0; /* reserved */ +            lba_to_msf(q, 0); +            q += 3; +        } else { +            /* sector 0 */ +            stl_be_p(q, 0); +            q += 4; +        } +    } +    /* lead out track */ +    *q++ = 0; /* reserved */ +    *q++ = 0x16; /* ADR, control */ +    *q++ = 0xaa; /* track number */ +    *q++ = 0; /* reserved */ +    if (msf) { +        *q++ = 0; /* reserved */ +        lba_to_msf(q, nb_sectors); +        q += 3; +    } else { +        stl_be_p(q, nb_sectors); +        q += 4; +    } +    len = q - buf; +    stw_be_p(buf, len - 2); +    return len; +} + +/* mostly same info as PearPc */ +int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num) +{ +    uint8_t *q; +    int len; + +    q = buf + 2; +    *q++ = 1; /* first session */ +    *q++ = 1; /* last session */ + +    *q++ = 1; /* session number */ +    *q++ = 0x14; /* data track */ +    *q++ = 0; /* track number */ +    *q++ = 0xa0; /* lead-in */ +    *q++ = 0; /* min */ +    *q++ = 0; /* sec */ +    *q++ = 0; /* frame */ +    *q++ = 0; +    *q++ = 1; /* first track */ +    *q++ = 0x00; /* disk type */ +    *q++ = 0x00; + +    *q++ = 1; /* session number */ +    *q++ = 0x14; /* data track */ +    *q++ = 0; /* track number */ +    *q++ = 0xa1; +    *q++ = 0; /* min */ +    *q++ = 0; /* sec */ +    *q++ = 0; /* frame */ +    *q++ = 0; +    *q++ = 1; /* last track */ +    *q++ = 0x00; +    *q++ = 0x00; + +    *q++ = 1; /* session number */ +    *q++ = 0x14; /* data track */ +    *q++ = 0; /* track number */ +    *q++ = 0xa2; /* lead-out */ +    *q++ = 0; /* min */ +    *q++ = 0; /* sec */ +    *q++ = 0; /* frame */ +    if (msf) { +        *q++ = 0; /* reserved */ +        lba_to_msf(q, nb_sectors); +        q += 3; +    } else { +        stl_be_p(q, nb_sectors); +        q += 4; +    } + +    *q++ = 1; /* session number */ +    *q++ = 0x14; /* ADR, control */ +    *q++ = 0;    /* track number */ +    *q++ = 1;    /* point */ +    *q++ = 0; /* min */ +    *q++ = 0; /* sec */ +    *q++ = 0; /* frame */ +    if (msf) { +        *q++ = 0; +        lba_to_msf(q, 0); +        q += 3; +    } else { +        *q++ = 0; +        *q++ = 0; +        *q++ = 0; +        *q++ = 0; +    } + +    len = q - buf; +    stw_be_p(buf, len - 2); +    return len; +} diff --git a/hw/block/dataplane/Makefile.objs b/hw/block/dataplane/Makefile.objs new file mode 100644 index 00000000..e786f664 --- /dev/null +++ b/hw/block/dataplane/Makefile.objs @@ -0,0 +1 @@ +obj-y += virtio-blk.o diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c new file mode 100644 index 00000000..6106e461 --- /dev/null +++ b/hw/block/dataplane/virtio-blk.c @@ -0,0 +1,341 @@ +/* + * Dedicated thread for virtio-blk I/O processing + * + * Copyright 2012 IBM, Corp. + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + *   Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "trace.h" +#include "qemu/iov.h" +#include "qemu/thread.h" +#include "qemu/error-report.h" +#include "hw/virtio/virtio-access.h" +#include "hw/virtio/dataplane/vring.h" +#include "hw/virtio/dataplane/vring-accessors.h" +#include "sysemu/block-backend.h" +#include "hw/virtio/virtio-blk.h" +#include "virtio-blk.h" +#include "block/aio.h" +#include "hw/virtio/virtio-bus.h" +#include "qom/object_interfaces.h" + +struct VirtIOBlockDataPlane { +    bool started; +    bool starting; +    bool stopping; +    bool disabled; + +    VirtIOBlkConf *conf; + +    VirtIODevice *vdev; +    Vring vring;                    /* virtqueue vring */ +    EventNotifier *guest_notifier;  /* irq */ +    QEMUBH *bh;                     /* bh for guest notification */ + +    /* Note that these EventNotifiers are assigned by value.  This is +     * fine as long as you do not call event_notifier_cleanup on them +     * (because you don't own the file descriptor or handle; you just +     * use it). +     */ +    IOThread *iothread; +    IOThread internal_iothread_obj; +    AioContext *ctx; +    EventNotifier host_notifier;    /* doorbell */ + +    /* Operation blocker on BDS */ +    Error *blocker; +    void (*saved_complete_request)(struct VirtIOBlockReq *req, +                                   unsigned char status); +}; + +/* Raise an interrupt to signal guest, if necessary */ +static void notify_guest(VirtIOBlockDataPlane *s) +{ +    if (!vring_should_notify(s->vdev, &s->vring)) { +        return; +    } + +    event_notifier_set(s->guest_notifier); +} + +static void notify_guest_bh(void *opaque) +{ +    VirtIOBlockDataPlane *s = opaque; + +    notify_guest(s); +} + +static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) +{ +    VirtIOBlockDataPlane *s = req->dev->dataplane; +    stb_p(&req->in->status, status); + +    vring_push(s->vdev, &req->dev->dataplane->vring, &req->elem, req->in_len); + +    /* Suppress notification to guest by BH and its scheduled +     * flag because requests are completed as a batch after io +     * plug & unplug is introduced, and the BH can still be +     * executed in dataplane aio context even after it is +     * stopped, so needn't worry about notification loss with BH. +     */ +    qemu_bh_schedule(s->bh); +} + +static void handle_notify(EventNotifier *e) +{ +    VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, +                                           host_notifier); +    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); + +    event_notifier_test_and_clear(&s->host_notifier); +    blk_io_plug(s->conf->conf.blk); +    for (;;) { +        MultiReqBuffer mrb = {}; +        int ret; + +        /* Disable guest->host notifies to avoid unnecessary vmexits */ +        vring_disable_notification(s->vdev, &s->vring); + +        for (;;) { +            VirtIOBlockReq *req = virtio_blk_alloc_request(vblk); + +            ret = vring_pop(s->vdev, &s->vring, &req->elem); +            if (ret < 0) { +                virtio_blk_free_request(req); +                break; /* no more requests */ +            } + +            trace_virtio_blk_data_plane_process_request(s, req->elem.out_num, +                                                        req->elem.in_num, +                                                        req->elem.index); + +            virtio_blk_handle_request(req, &mrb); +        } + +        if (mrb.num_reqs) { +            virtio_blk_submit_multireq(s->conf->conf.blk, &mrb); +        } + +        if (likely(ret == -EAGAIN)) { /* vring emptied */ +            /* Re-enable guest->host notifies and stop processing the vring. +             * But if the guest has snuck in more descriptors, keep processing. +             */ +            if (vring_enable_notification(s->vdev, &s->vring)) { +                break; +            } +        } else { /* fatal error */ +            break; +        } +    } +    blk_io_unplug(s->conf->conf.blk); +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, +                                  VirtIOBlockDataPlane **dataplane, +                                  Error **errp) +{ +    VirtIOBlockDataPlane *s; +    Error *local_err = NULL; +    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + +    *dataplane = NULL; + +    if (!conf->data_plane && !conf->iothread) { +        return; +    } + +    /* Don't try if transport does not support notifiers. */ +    if (!k->set_guest_notifiers || !k->set_host_notifier) { +        error_setg(errp, +                   "device is incompatible with x-data-plane " +                   "(transport does not support notifiers)"); +        return; +    } + +    /* If dataplane is (re-)enabled while the guest is running there could be +     * block jobs that can conflict. +     */ +    if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, +                          &local_err)) { +        error_setg(errp, "cannot start dataplane thread: %s", +                   error_get_pretty(local_err)); +        error_free(local_err); +        return; +    } + +    s = g_new0(VirtIOBlockDataPlane, 1); +    s->vdev = vdev; +    s->conf = conf; + +    if (conf->iothread) { +        s->iothread = conf->iothread; +        object_ref(OBJECT(s->iothread)); +    } else { +        /* Create per-device IOThread if none specified.  This is for +         * x-data-plane option compatibility.  If x-data-plane is removed we +         * can drop this. +         */ +        object_initialize(&s->internal_iothread_obj, +                          sizeof(s->internal_iothread_obj), +                          TYPE_IOTHREAD); +        user_creatable_complete(OBJECT(&s->internal_iothread_obj), &error_abort); +        s->iothread = &s->internal_iothread_obj; +    } +    s->ctx = iothread_get_aio_context(s->iothread); +    s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); + +    error_setg(&s->blocker, "block device is in use by data plane"); +    blk_op_block_all(conf->conf.blk, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_RESIZE, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_DRIVE_DEL, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_BACKUP_SOURCE, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_CHANGE, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_SOURCE, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_COMMIT_TARGET, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EJECT, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, +                   s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_MIRROR, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_STREAM, s->blocker); +    blk_op_unblock(conf->conf.blk, BLOCK_OP_TYPE_REPLACE, s->blocker); + +    *dataplane = s; +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) +{ +    if (!s) { +        return; +    } + +    virtio_blk_data_plane_stop(s); +    blk_op_unblock_all(s->conf->conf.blk, s->blocker); +    error_free(s->blocker); +    qemu_bh_delete(s->bh); +    object_unref(OBJECT(s->iothread)); +    g_free(s); +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) +{ +    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); +    VirtQueue *vq; +    int r; + +    if (s->started || s->disabled) { +        return; +    } + +    if (s->starting) { +        return; +    } + +    s->starting = true; + +    vq = virtio_get_queue(s->vdev, 0); +    if (!vring_setup(&s->vring, s->vdev, 0)) { +        goto fail_vring; +    } + +    /* Set up guest notifier (irq) */ +    r = k->set_guest_notifiers(qbus->parent, 1, true); +    if (r != 0) { +        fprintf(stderr, "virtio-blk failed to set guest notifier (%d), " +                "ensure -enable-kvm is set\n", r); +        goto fail_guest_notifiers; +    } +    s->guest_notifier = virtio_queue_get_guest_notifier(vq); + +    /* Set up virtqueue notify */ +    r = k->set_host_notifier(qbus->parent, 0, true); +    if (r != 0) { +        fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); +        goto fail_host_notifier; +    } +    s->host_notifier = *virtio_queue_get_host_notifier(vq); + +    s->saved_complete_request = vblk->complete_request; +    vblk->complete_request = complete_request_vring; + +    s->starting = false; +    s->started = true; +    trace_virtio_blk_data_plane_start(s); + +    blk_set_aio_context(s->conf->conf.blk, s->ctx); + +    /* Kick right away to begin processing requests already in vring */ +    event_notifier_set(virtio_queue_get_host_notifier(vq)); + +    /* Get this show started by hooking up our callbacks */ +    aio_context_acquire(s->ctx); +    aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify); +    aio_context_release(s->ctx); +    return; + +  fail_host_notifier: +    k->set_guest_notifiers(qbus->parent, 1, false); +  fail_guest_notifiers: +    vring_teardown(&s->vring, s->vdev, 0); +    s->disabled = true; +  fail_vring: +    s->starting = false; +} + +/* Context: QEMU global mutex held */ +void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s) +{ +    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev))); +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); + + +    /* Better luck next time. */ +    if (s->disabled) { +        s->disabled = false; +        return; +    } +    if (!s->started || s->stopping) { +        return; +    } +    s->stopping = true; +    vblk->complete_request = s->saved_complete_request; +    trace_virtio_blk_data_plane_stop(s); + +    aio_context_acquire(s->ctx); + +    /* Stop notifications for new requests from guest */ +    aio_set_event_notifier(s->ctx, &s->host_notifier, NULL); + +    /* Drain and switch bs back to the QEMU main loop */ +    blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context()); + +    aio_context_release(s->ctx); + +    /* Sync vring state back to virtqueue so that non-dataplane request +     * processing can continue when we disable the host notifier below. +     */ +    vring_teardown(&s->vring, s->vdev, 0); + +    k->set_host_notifier(qbus->parent, 0, false); + +    /* Clean up guest notifier (irq) */ +    k->set_guest_notifiers(qbus->parent, 1, false); + +    s->started = false; +    s->stopping = false; +} diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h new file mode 100644 index 00000000..c88d40e7 --- /dev/null +++ b/hw/block/dataplane/virtio-blk.h @@ -0,0 +1,30 @@ +/* + * Dedicated thread for virtio-blk I/O processing + * + * Copyright 2012 IBM, Corp. + * Copyright 2012 Red Hat, Inc. and/or its affiliates + * + * Authors: + *   Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef HW_DATAPLANE_VIRTIO_BLK_H +#define HW_DATAPLANE_VIRTIO_BLK_H + +#include "hw/virtio/virtio.h" + +typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; + +void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, +                                  VirtIOBlockDataPlane **dataplane, +                                  Error **errp); +void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s); +void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s); + +#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ diff --git a/hw/block/ecc.c b/hw/block/ecc.c new file mode 100644 index 00000000..10bb2330 --- /dev/null +++ b/hw/block/ecc.c @@ -0,0 +1,90 @@ +/* + * Calculate Error-correcting Codes. Used by NAND Flash controllers + * (not by NAND chips). + * + * Copyright (c) 2006 Openedhand Ltd. + * Written by Andrzej Zaborowski <balrog@zabor.org> + * + * This code is licensed under the GNU GPL v2. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "hw/hw.h" +#include "hw/block/flash.h" + +/* + * Pre-calculated 256-way 1 byte column parity.  Table borrowed from Linux. + */ +static const uint8_t nand_ecc_precalc_table[] = { +    0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, +    0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00, +    0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, +    0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, +    0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, +    0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, +    0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, +    0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, +    0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, +    0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, +    0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, +    0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, +    0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, +    0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, +    0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, +    0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, +    0x6a, 0x3f, 0x3c, 0x69, 0x33, 0x66, 0x65, 0x30, +    0x30, 0x65, 0x66, 0x33, 0x69, 0x3c, 0x3f, 0x6a, +    0x0f, 0x5a, 0x59, 0x0c, 0x56, 0x03, 0x00, 0x55, +    0x55, 0x00, 0x03, 0x56, 0x0c, 0x59, 0x5a, 0x0f, +    0x0c, 0x59, 0x5a, 0x0f, 0x55, 0x00, 0x03, 0x56, +    0x56, 0x03, 0x00, 0x55, 0x0f, 0x5a, 0x59, 0x0c, +    0x69, 0x3c, 0x3f, 0x6a, 0x30, 0x65, 0x66, 0x33, +    0x33, 0x66, 0x65, 0x30, 0x6a, 0x3f, 0x3c, 0x69, +    0x03, 0x56, 0x55, 0x00, 0x5a, 0x0f, 0x0c, 0x59, +    0x59, 0x0c, 0x0f, 0x5a, 0x00, 0x55, 0x56, 0x03, +    0x66, 0x33, 0x30, 0x65, 0x3f, 0x6a, 0x69, 0x3c, +    0x3c, 0x69, 0x6a, 0x3f, 0x65, 0x30, 0x33, 0x66, +    0x65, 0x30, 0x33, 0x66, 0x3c, 0x69, 0x6a, 0x3f, +    0x3f, 0x6a, 0x69, 0x3c, 0x66, 0x33, 0x30, 0x65, +    0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, +    0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00, +}; + +/* Update ECC parity count.  */ +uint8_t ecc_digest(ECCState *s, uint8_t sample) +{ +    uint8_t idx = nand_ecc_precalc_table[sample]; + +    s->cp ^= idx & 0x3f; +    if (idx & 0x40) { +        s->lp[0] ^= ~s->count; +        s->lp[1] ^= s->count; +    } +    s->count ++; + +    return sample; +} + +/* Reinitialise the counters.  */ +void ecc_reset(ECCState *s) +{ +    s->lp[0] = 0x0000; +    s->lp[1] = 0x0000; +    s->cp = 0x00; +    s->count = 0; +} + +/* Save/restore */ +VMStateDescription vmstate_ecc_state = { +    .name = "ecc-state", +    .version_id = 0, +    .minimum_version_id = 0, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(cp, ECCState), +        VMSTATE_UINT16_ARRAY(lp, ECCState, 2), +        VMSTATE_UINT16(count, ECCState), +        VMSTATE_END_OF_LIST(), +    }, +}; diff --git a/hw/block/fdc.c b/hw/block/fdc.c new file mode 100644 index 00000000..5e1b67ee --- /dev/null +++ b/hw/block/fdc.c @@ -0,0 +1,2529 @@ +/* + * QEMU Floppy disk emulator (Intel 82078) + * + * Copyright (c) 2003, 2007 Jocelyn Mayer + * Copyright (c) 2008 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/* + * The controller is used in Sun4m systems in a slightly different + * way. There are changes in DOR register and DMA is not available. + */ + +#include "hw/hw.h" +#include "hw/block/fdc.h" +#include "qemu/error-report.h" +#include "qemu/timer.h" +#include "hw/isa/isa.h" +#include "hw/sysbus.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "sysemu/sysemu.h" +#include "qemu/log.h" + +/********************************************************/ +/* debug Floppy devices */ +//#define DEBUG_FLOPPY + +#ifdef DEBUG_FLOPPY +#define FLOPPY_DPRINTF(fmt, ...)                                \ +    do { printf("FLOPPY: " fmt , ## __VA_ARGS__); } while (0) +#else +#define FLOPPY_DPRINTF(fmt, ...) +#endif + +/********************************************************/ +/* Floppy drive emulation                               */ + +typedef enum FDriveRate { +    FDRIVE_RATE_500K = 0x00,  /* 500 Kbps */ +    FDRIVE_RATE_300K = 0x01,  /* 300 Kbps */ +    FDRIVE_RATE_250K = 0x02,  /* 250 Kbps */ +    FDRIVE_RATE_1M   = 0x03,  /*   1 Mbps */ +} FDriveRate; + +typedef struct FDFormat { +    FDriveType drive; +    uint8_t last_sect; +    uint8_t max_track; +    uint8_t max_head; +    FDriveRate rate; +} FDFormat; + +static const FDFormat fd_formats[] = { +    /* First entry is default format */ +    /* 1.44 MB 3"1/2 floppy disks */ +    { FDRIVE_DRV_144, 18, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 20, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 21, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 21, 82, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 21, 83, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 22, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 23, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_144, 24, 80, 1, FDRIVE_RATE_500K, }, +    /* 2.88 MB 3"1/2 floppy disks */ +    { FDRIVE_DRV_288, 36, 80, 1, FDRIVE_RATE_1M, }, +    { FDRIVE_DRV_288, 39, 80, 1, FDRIVE_RATE_1M, }, +    { FDRIVE_DRV_288, 40, 80, 1, FDRIVE_RATE_1M, }, +    { FDRIVE_DRV_288, 44, 80, 1, FDRIVE_RATE_1M, }, +    { FDRIVE_DRV_288, 48, 80, 1, FDRIVE_RATE_1M, }, +    /* 720 kB 3"1/2 floppy disks */ +    { FDRIVE_DRV_144,  9, 80, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_144, 10, 80, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_144, 10, 82, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_144, 10, 83, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_144, 13, 80, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_144, 14, 80, 1, FDRIVE_RATE_250K, }, +    /* 1.2 MB 5"1/4 floppy disks */ +    { FDRIVE_DRV_120, 15, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_120, 18, 80, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_120, 18, 82, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_120, 18, 83, 1, FDRIVE_RATE_500K, }, +    { FDRIVE_DRV_120, 20, 80, 1, FDRIVE_RATE_500K, }, +    /* 720 kB 5"1/4 floppy disks */ +    { FDRIVE_DRV_120,  9, 80, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_120, 11, 80, 1, FDRIVE_RATE_250K, }, +    /* 360 kB 5"1/4 floppy disks */ +    { FDRIVE_DRV_120,  9, 40, 1, FDRIVE_RATE_300K, }, +    { FDRIVE_DRV_120,  9, 40, 0, FDRIVE_RATE_300K, }, +    { FDRIVE_DRV_120, 10, 41, 1, FDRIVE_RATE_300K, }, +    { FDRIVE_DRV_120, 10, 42, 1, FDRIVE_RATE_300K, }, +    /* 320 kB 5"1/4 floppy disks */ +    { FDRIVE_DRV_120,  8, 40, 1, FDRIVE_RATE_250K, }, +    { FDRIVE_DRV_120,  8, 40, 0, FDRIVE_RATE_250K, }, +    /* 360 kB must match 5"1/4 better than 3"1/2... */ +    { FDRIVE_DRV_144,  9, 80, 0, FDRIVE_RATE_250K, }, +    /* end */ +    { FDRIVE_DRV_NONE, -1, -1, 0, 0, }, +}; + +static void pick_geometry(BlockBackend *blk, int *nb_heads, +                          int *max_track, int *last_sect, +                          FDriveType drive_in, FDriveType *drive, +                          FDriveRate *rate) +{ +    const FDFormat *parse; +    uint64_t nb_sectors, size; +    int i, first_match, match; + +    blk_get_geometry(blk, &nb_sectors); +    match = -1; +    first_match = -1; +    for (i = 0; ; i++) { +        parse = &fd_formats[i]; +        if (parse->drive == FDRIVE_DRV_NONE) { +            break; +        } +        if (drive_in == parse->drive || +            drive_in == FDRIVE_DRV_NONE) { +            size = (parse->max_head + 1) * parse->max_track * +                parse->last_sect; +            if (nb_sectors == size) { +                match = i; +                break; +            } +            if (first_match == -1) { +                first_match = i; +            } +        } +    } +    if (match == -1) { +        if (first_match == -1) { +            match = 1; +        } else { +            match = first_match; +        } +        parse = &fd_formats[match]; +    } +    *nb_heads = parse->max_head + 1; +    *max_track = parse->max_track; +    *last_sect = parse->last_sect; +    *drive = parse->drive; +    *rate = parse->rate; +} + +#define GET_CUR_DRV(fdctrl) ((fdctrl)->cur_drv) +#define SET_CUR_DRV(fdctrl, drive) ((fdctrl)->cur_drv = (drive)) + +/* Will always be a fixed parameter for us */ +#define FD_SECTOR_LEN          512 +#define FD_SECTOR_SC           2   /* Sector size code */ +#define FD_RESET_SENSEI_COUNT  4   /* Number of sense interrupts on RESET */ + +typedef struct FDCtrl FDCtrl; + +/* Floppy disk drive emulation */ +typedef enum FDiskFlags { +    FDISK_DBL_SIDES  = 0x01, +} FDiskFlags; + +typedef struct FDrive { +    FDCtrl *fdctrl; +    BlockBackend *blk; +    /* Drive status */ +    FDriveType drive; +    uint8_t perpendicular;    /* 2.88 MB access mode    */ +    /* Position */ +    uint8_t head; +    uint8_t track; +    uint8_t sect; +    /* Media */ +    FDiskFlags flags; +    uint8_t last_sect;        /* Nb sector per track    */ +    uint8_t max_track;        /* Nb of tracks           */ +    uint16_t bps;             /* Bytes per sector       */ +    uint8_t ro;               /* Is read-only           */ +    uint8_t media_changed;    /* Is media changed       */ +    uint8_t media_rate;       /* Data rate of medium    */ +} FDrive; + +static void fd_init(FDrive *drv) +{ +    /* Drive */ +    drv->drive = FDRIVE_DRV_NONE; +    drv->perpendicular = 0; +    /* Disk */ +    drv->last_sect = 0; +    drv->max_track = 0; +} + +#define NUM_SIDES(drv) ((drv)->flags & FDISK_DBL_SIDES ? 2 : 1) + +static int fd_sector_calc(uint8_t head, uint8_t track, uint8_t sect, +                          uint8_t last_sect, uint8_t num_sides) +{ +    return (((track * num_sides) + head) * last_sect) + sect - 1; +} + +/* Returns current position, in sectors, for given drive */ +static int fd_sector(FDrive *drv) +{ +    return fd_sector_calc(drv->head, drv->track, drv->sect, drv->last_sect, +                          NUM_SIDES(drv)); +} + +/* Seek to a new position: + * returns 0 if already on right track + * returns 1 if track changed + * returns 2 if track is invalid + * returns 3 if sector is invalid + * returns 4 if seek is disabled + */ +static int fd_seek(FDrive *drv, uint8_t head, uint8_t track, uint8_t sect, +                   int enable_seek) +{ +    uint32_t sector; +    int ret; + +    if (track > drv->max_track || +        (head != 0 && (drv->flags & FDISK_DBL_SIDES) == 0)) { +        FLOPPY_DPRINTF("try to read %d %02x %02x (max=%d %d %02x %02x)\n", +                       head, track, sect, 1, +                       (drv->flags & FDISK_DBL_SIDES) == 0 ? 0 : 1, +                       drv->max_track, drv->last_sect); +        return 2; +    } +    if (sect > drv->last_sect) { +        FLOPPY_DPRINTF("try to read %d %02x %02x (max=%d %d %02x %02x)\n", +                       head, track, sect, 1, +                       (drv->flags & FDISK_DBL_SIDES) == 0 ? 0 : 1, +                       drv->max_track, drv->last_sect); +        return 3; +    } +    sector = fd_sector_calc(head, track, sect, drv->last_sect, NUM_SIDES(drv)); +    ret = 0; +    if (sector != fd_sector(drv)) { +#if 0 +        if (!enable_seek) { +            FLOPPY_DPRINTF("error: no implicit seek %d %02x %02x" +                           " (max=%d %02x %02x)\n", +                           head, track, sect, 1, drv->max_track, +                           drv->last_sect); +            return 4; +        } +#endif +        drv->head = head; +        if (drv->track != track) { +            if (drv->blk != NULL && blk_is_inserted(drv->blk)) { +                drv->media_changed = 0; +            } +            ret = 1; +        } +        drv->track = track; +        drv->sect = sect; +    } + +    if (drv->blk == NULL || !blk_is_inserted(drv->blk)) { +        ret = 2; +    } + +    return ret; +} + +/* Set drive back to track 0 */ +static void fd_recalibrate(FDrive *drv) +{ +    FLOPPY_DPRINTF("recalibrate\n"); +    fd_seek(drv, 0, 0, 1, 1); +} + +/* Revalidate a disk drive after a disk change */ +static void fd_revalidate(FDrive *drv) +{ +    int nb_heads, max_track, last_sect, ro; +    FDriveType drive; +    FDriveRate rate; + +    FLOPPY_DPRINTF("revalidate\n"); +    if (drv->blk != NULL) { +        ro = blk_is_read_only(drv->blk); +        pick_geometry(drv->blk, &nb_heads, &max_track, +                      &last_sect, drv->drive, &drive, &rate); +        if (!blk_is_inserted(drv->blk)) { +            FLOPPY_DPRINTF("No disk in drive\n"); +        } else { +            FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads, +                           max_track, last_sect, ro ? "ro" : "rw"); +        } +        if (nb_heads == 1) { +            drv->flags &= ~FDISK_DBL_SIDES; +        } else { +            drv->flags |= FDISK_DBL_SIDES; +        } +        drv->max_track = max_track; +        drv->last_sect = last_sect; +        drv->ro = ro; +        drv->drive = drive; +        drv->media_rate = rate; +    } else { +        FLOPPY_DPRINTF("No drive connected\n"); +        drv->last_sect = 0; +        drv->max_track = 0; +        drv->flags &= ~FDISK_DBL_SIDES; +    } +} + +/********************************************************/ +/* Intel 82078 floppy disk controller emulation          */ + +static void fdctrl_reset(FDCtrl *fdctrl, int do_irq); +static void fdctrl_to_command_phase(FDCtrl *fdctrl); +static int fdctrl_transfer_handler (void *opaque, int nchan, +                                    int dma_pos, int dma_len); +static void fdctrl_raise_irq(FDCtrl *fdctrl); +static FDrive *get_cur_drv(FDCtrl *fdctrl); + +static uint32_t fdctrl_read_statusA(FDCtrl *fdctrl); +static uint32_t fdctrl_read_statusB(FDCtrl *fdctrl); +static uint32_t fdctrl_read_dor(FDCtrl *fdctrl); +static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_tape(FDCtrl *fdctrl); +static void fdctrl_write_tape(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_main_status(FDCtrl *fdctrl); +static void fdctrl_write_rate(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_data(FDCtrl *fdctrl); +static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value); +static uint32_t fdctrl_read_dir(FDCtrl *fdctrl); +static void fdctrl_write_ccr(FDCtrl *fdctrl, uint32_t value); + +enum { +    FD_DIR_WRITE   = 0, +    FD_DIR_READ    = 1, +    FD_DIR_SCANE   = 2, +    FD_DIR_SCANL   = 3, +    FD_DIR_SCANH   = 4, +    FD_DIR_VERIFY  = 5, +}; + +enum { +    FD_STATE_MULTI  = 0x01,	/* multi track flag */ +    FD_STATE_FORMAT = 0x02,	/* format flag */ +}; + +enum { +    FD_REG_SRA = 0x00, +    FD_REG_SRB = 0x01, +    FD_REG_DOR = 0x02, +    FD_REG_TDR = 0x03, +    FD_REG_MSR = 0x04, +    FD_REG_DSR = 0x04, +    FD_REG_FIFO = 0x05, +    FD_REG_DIR = 0x07, +    FD_REG_CCR = 0x07, +}; + +enum { +    FD_CMD_READ_TRACK = 0x02, +    FD_CMD_SPECIFY = 0x03, +    FD_CMD_SENSE_DRIVE_STATUS = 0x04, +    FD_CMD_WRITE = 0x05, +    FD_CMD_READ = 0x06, +    FD_CMD_RECALIBRATE = 0x07, +    FD_CMD_SENSE_INTERRUPT_STATUS = 0x08, +    FD_CMD_WRITE_DELETED = 0x09, +    FD_CMD_READ_ID = 0x0a, +    FD_CMD_READ_DELETED = 0x0c, +    FD_CMD_FORMAT_TRACK = 0x0d, +    FD_CMD_DUMPREG = 0x0e, +    FD_CMD_SEEK = 0x0f, +    FD_CMD_VERSION = 0x10, +    FD_CMD_SCAN_EQUAL = 0x11, +    FD_CMD_PERPENDICULAR_MODE = 0x12, +    FD_CMD_CONFIGURE = 0x13, +    FD_CMD_LOCK = 0x14, +    FD_CMD_VERIFY = 0x16, +    FD_CMD_POWERDOWN_MODE = 0x17, +    FD_CMD_PART_ID = 0x18, +    FD_CMD_SCAN_LOW_OR_EQUAL = 0x19, +    FD_CMD_SCAN_HIGH_OR_EQUAL = 0x1d, +    FD_CMD_SAVE = 0x2e, +    FD_CMD_OPTION = 0x33, +    FD_CMD_RESTORE = 0x4e, +    FD_CMD_DRIVE_SPECIFICATION_COMMAND = 0x8e, +    FD_CMD_RELATIVE_SEEK_OUT = 0x8f, +    FD_CMD_FORMAT_AND_WRITE = 0xcd, +    FD_CMD_RELATIVE_SEEK_IN = 0xcf, +}; + +enum { +    FD_CONFIG_PRETRK = 0xff, /* Pre-compensation set to track 0 */ +    FD_CONFIG_FIFOTHR = 0x0f, /* FIFO threshold set to 1 byte */ +    FD_CONFIG_POLL  = 0x10, /* Poll enabled */ +    FD_CONFIG_EFIFO = 0x20, /* FIFO disabled */ +    FD_CONFIG_EIS   = 0x40, /* No implied seeks */ +}; + +enum { +    FD_SR0_DS0      = 0x01, +    FD_SR0_DS1      = 0x02, +    FD_SR0_HEAD     = 0x04, +    FD_SR0_EQPMT    = 0x10, +    FD_SR0_SEEK     = 0x20, +    FD_SR0_ABNTERM  = 0x40, +    FD_SR0_INVCMD   = 0x80, +    FD_SR0_RDYCHG   = 0xc0, +}; + +enum { +    FD_SR1_MA       = 0x01, /* Missing address mark */ +    FD_SR1_NW       = 0x02, /* Not writable */ +    FD_SR1_EC       = 0x80, /* End of cylinder */ +}; + +enum { +    FD_SR2_SNS      = 0x04, /* Scan not satisfied */ +    FD_SR2_SEH      = 0x08, /* Scan equal hit */ +}; + +enum { +    FD_SRA_DIR      = 0x01, +    FD_SRA_nWP      = 0x02, +    FD_SRA_nINDX    = 0x04, +    FD_SRA_HDSEL    = 0x08, +    FD_SRA_nTRK0    = 0x10, +    FD_SRA_STEP     = 0x20, +    FD_SRA_nDRV2    = 0x40, +    FD_SRA_INTPEND  = 0x80, +}; + +enum { +    FD_SRB_MTR0     = 0x01, +    FD_SRB_MTR1     = 0x02, +    FD_SRB_WGATE    = 0x04, +    FD_SRB_RDATA    = 0x08, +    FD_SRB_WDATA    = 0x10, +    FD_SRB_DR0      = 0x20, +}; + +enum { +#if MAX_FD == 4 +    FD_DOR_SELMASK  = 0x03, +#else +    FD_DOR_SELMASK  = 0x01, +#endif +    FD_DOR_nRESET   = 0x04, +    FD_DOR_DMAEN    = 0x08, +    FD_DOR_MOTEN0   = 0x10, +    FD_DOR_MOTEN1   = 0x20, +    FD_DOR_MOTEN2   = 0x40, +    FD_DOR_MOTEN3   = 0x80, +}; + +enum { +#if MAX_FD == 4 +    FD_TDR_BOOTSEL  = 0x0c, +#else +    FD_TDR_BOOTSEL  = 0x04, +#endif +}; + +enum { +    FD_DSR_DRATEMASK= 0x03, +    FD_DSR_PWRDOWN  = 0x40, +    FD_DSR_SWRESET  = 0x80, +}; + +enum { +    FD_MSR_DRV0BUSY = 0x01, +    FD_MSR_DRV1BUSY = 0x02, +    FD_MSR_DRV2BUSY = 0x04, +    FD_MSR_DRV3BUSY = 0x08, +    FD_MSR_CMDBUSY  = 0x10, +    FD_MSR_NONDMA   = 0x20, +    FD_MSR_DIO      = 0x40, +    FD_MSR_RQM      = 0x80, +}; + +enum { +    FD_DIR_DSKCHG   = 0x80, +}; + +/* + * See chapter 5.0 "Controller phases" of the spec: + * + * Command phase: + * The host writes a command and its parameters into the FIFO. The command + * phase is completed when all parameters for the command have been supplied, + * and execution phase is entered. + * + * Execution phase: + * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO + * contains the payload now, otherwise it's unused. When all bytes of the + * required data have been transferred, the state is switched to either result + * phase (if the command produces status bytes) or directly back into the + * command phase for the next command. + * + * Result phase: + * The host reads out the FIFO, which contains one or more result bytes now. + */ +enum { +    /* Only for migration: reconstruct phase from registers like qemu 2.3 */ +    FD_PHASE_RECONSTRUCT    = 0, + +    FD_PHASE_COMMAND        = 1, +    FD_PHASE_EXECUTION      = 2, +    FD_PHASE_RESULT         = 3, +}; + +#define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI) +#define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT) + +struct FDCtrl { +    MemoryRegion iomem; +    qemu_irq irq; +    /* Controller state */ +    QEMUTimer *result_timer; +    int dma_chann; +    uint8_t phase; +    /* Controller's identification */ +    uint8_t version; +    /* HW */ +    uint8_t sra; +    uint8_t srb; +    uint8_t dor; +    uint8_t dor_vmstate; /* only used as temp during vmstate */ +    uint8_t tdr; +    uint8_t dsr; +    uint8_t msr; +    uint8_t cur_drv; +    uint8_t status0; +    uint8_t status1; +    uint8_t status2; +    /* Command FIFO */ +    uint8_t *fifo; +    int32_t fifo_size; +    uint32_t data_pos; +    uint32_t data_len; +    uint8_t data_state; +    uint8_t data_dir; +    uint8_t eot; /* last wanted sector */ +    /* States kept only to be returned back */ +    /* precompensation */ +    uint8_t precomp_trk; +    uint8_t config; +    uint8_t lock; +    /* Power down config (also with status regB access mode */ +    uint8_t pwrd; +    /* Floppy drives */ +    uint8_t num_floppies; +    FDrive drives[MAX_FD]; +    int reset_sensei; +    uint32_t check_media_rate; +    /* Timers state */ +    uint8_t timer0; +    uint8_t timer1; +}; + +#define TYPE_SYSBUS_FDC "base-sysbus-fdc" +#define SYSBUS_FDC(obj) OBJECT_CHECK(FDCtrlSysBus, (obj), TYPE_SYSBUS_FDC) + +typedef struct FDCtrlSysBus { +    /*< private >*/ +    SysBusDevice parent_obj; +    /*< public >*/ + +    struct FDCtrl state; +} FDCtrlSysBus; + +#define ISA_FDC(obj) OBJECT_CHECK(FDCtrlISABus, (obj), TYPE_ISA_FDC) + +typedef struct FDCtrlISABus { +    ISADevice parent_obj; + +    uint32_t iobase; +    uint32_t irq; +    uint32_t dma; +    struct FDCtrl state; +    int32_t bootindexA; +    int32_t bootindexB; +} FDCtrlISABus; + +static uint32_t fdctrl_read (void *opaque, uint32_t reg) +{ +    FDCtrl *fdctrl = opaque; +    uint32_t retval; + +    reg &= 7; +    switch (reg) { +    case FD_REG_SRA: +        retval = fdctrl_read_statusA(fdctrl); +        break; +    case FD_REG_SRB: +        retval = fdctrl_read_statusB(fdctrl); +        break; +    case FD_REG_DOR: +        retval = fdctrl_read_dor(fdctrl); +        break; +    case FD_REG_TDR: +        retval = fdctrl_read_tape(fdctrl); +        break; +    case FD_REG_MSR: +        retval = fdctrl_read_main_status(fdctrl); +        break; +    case FD_REG_FIFO: +        retval = fdctrl_read_data(fdctrl); +        break; +    case FD_REG_DIR: +        retval = fdctrl_read_dir(fdctrl); +        break; +    default: +        retval = (uint32_t)(-1); +        break; +    } +    FLOPPY_DPRINTF("read reg%d: 0x%02x\n", reg & 7, retval); + +    return retval; +} + +static void fdctrl_write (void *opaque, uint32_t reg, uint32_t value) +{ +    FDCtrl *fdctrl = opaque; + +    FLOPPY_DPRINTF("write reg%d: 0x%02x\n", reg & 7, value); + +    reg &= 7; +    switch (reg) { +    case FD_REG_DOR: +        fdctrl_write_dor(fdctrl, value); +        break; +    case FD_REG_TDR: +        fdctrl_write_tape(fdctrl, value); +        break; +    case FD_REG_DSR: +        fdctrl_write_rate(fdctrl, value); +        break; +    case FD_REG_FIFO: +        fdctrl_write_data(fdctrl, value); +        break; +    case FD_REG_CCR: +        fdctrl_write_ccr(fdctrl, value); +        break; +    default: +        break; +    } +} + +static uint64_t fdctrl_read_mem (void *opaque, hwaddr reg, +                                 unsigned ize) +{ +    return fdctrl_read(opaque, (uint32_t)reg); +} + +static void fdctrl_write_mem (void *opaque, hwaddr reg, +                              uint64_t value, unsigned size) +{ +    fdctrl_write(opaque, (uint32_t)reg, value); +} + +static const MemoryRegionOps fdctrl_mem_ops = { +    .read = fdctrl_read_mem, +    .write = fdctrl_write_mem, +    .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps fdctrl_mem_strict_ops = { +    .read = fdctrl_read_mem, +    .write = fdctrl_write_mem, +    .endianness = DEVICE_NATIVE_ENDIAN, +    .valid = { +        .min_access_size = 1, +        .max_access_size = 1, +    }, +}; + +static bool fdrive_media_changed_needed(void *opaque) +{ +    FDrive *drive = opaque; + +    return (drive->blk != NULL && drive->media_changed != 1); +} + +static const VMStateDescription vmstate_fdrive_media_changed = { +    .name = "fdrive/media_changed", +    .version_id = 1, +    .minimum_version_id = 1, +    .needed = fdrive_media_changed_needed, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(media_changed, FDrive), +        VMSTATE_END_OF_LIST() +    } +}; + +static bool fdrive_media_rate_needed(void *opaque) +{ +    FDrive *drive = opaque; + +    return drive->fdctrl->check_media_rate; +} + +static const VMStateDescription vmstate_fdrive_media_rate = { +    .name = "fdrive/media_rate", +    .version_id = 1, +    .minimum_version_id = 1, +    .needed = fdrive_media_rate_needed, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(media_rate, FDrive), +        VMSTATE_END_OF_LIST() +    } +}; + +static bool fdrive_perpendicular_needed(void *opaque) +{ +    FDrive *drive = opaque; + +    return drive->perpendicular != 0; +} + +static const VMStateDescription vmstate_fdrive_perpendicular = { +    .name = "fdrive/perpendicular", +    .version_id = 1, +    .minimum_version_id = 1, +    .needed = fdrive_perpendicular_needed, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(perpendicular, FDrive), +        VMSTATE_END_OF_LIST() +    } +}; + +static int fdrive_post_load(void *opaque, int version_id) +{ +    fd_revalidate(opaque); +    return 0; +} + +static const VMStateDescription vmstate_fdrive = { +    .name = "fdrive", +    .version_id = 1, +    .minimum_version_id = 1, +    .post_load = fdrive_post_load, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(head, FDrive), +        VMSTATE_UINT8(track, FDrive), +        VMSTATE_UINT8(sect, FDrive), +        VMSTATE_END_OF_LIST() +    }, +    .subsections = (const VMStateDescription*[]) { +        &vmstate_fdrive_media_changed, +        &vmstate_fdrive_media_rate, +        &vmstate_fdrive_perpendicular, +        NULL +    } +}; + +/* + * Reconstructs the phase from register values according to the logic that was + * implemented in qemu 2.3. This is the default value that is used if the phase + * subsection is not present on migration. + * + * Don't change this function to reflect newer qemu versions, it is part of + * the migration ABI. + */ +static int reconstruct_phase(FDCtrl *fdctrl) +{ +    if (fdctrl->msr & FD_MSR_NONDMA) { +        return FD_PHASE_EXECUTION; +    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) { +        /* qemu 2.3 disabled RQM only during DMA transfers */ +        return FD_PHASE_EXECUTION; +    } else if (fdctrl->msr & FD_MSR_DIO) { +        return FD_PHASE_RESULT; +    } else { +        return FD_PHASE_COMMAND; +    } +} + +static void fdc_pre_save(void *opaque) +{ +    FDCtrl *s = opaque; + +    s->dor_vmstate = s->dor | GET_CUR_DRV(s); +} + +static int fdc_pre_load(void *opaque) +{ +    FDCtrl *s = opaque; +    s->phase = FD_PHASE_RECONSTRUCT; +    return 0; +} + +static int fdc_post_load(void *opaque, int version_id) +{ +    FDCtrl *s = opaque; + +    SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK); +    s->dor = s->dor_vmstate & ~FD_DOR_SELMASK; + +    if (s->phase == FD_PHASE_RECONSTRUCT) { +        s->phase = reconstruct_phase(s); +    } + +    return 0; +} + +static bool fdc_reset_sensei_needed(void *opaque) +{ +    FDCtrl *s = opaque; + +    return s->reset_sensei != 0; +} + +static const VMStateDescription vmstate_fdc_reset_sensei = { +    .name = "fdc/reset_sensei", +    .version_id = 1, +    .minimum_version_id = 1, +    .needed = fdc_reset_sensei_needed, +    .fields = (VMStateField[]) { +        VMSTATE_INT32(reset_sensei, FDCtrl), +        VMSTATE_END_OF_LIST() +    } +}; + +static bool fdc_result_timer_needed(void *opaque) +{ +    FDCtrl *s = opaque; + +    return timer_pending(s->result_timer); +} + +static const VMStateDescription vmstate_fdc_result_timer = { +    .name = "fdc/result_timer", +    .version_id = 1, +    .minimum_version_id = 1, +    .needed = fdc_result_timer_needed, +    .fields = (VMStateField[]) { +        VMSTATE_TIMER_PTR(result_timer, FDCtrl), +        VMSTATE_END_OF_LIST() +    } +}; + +static bool fdc_phase_needed(void *opaque) +{ +    FDCtrl *fdctrl = opaque; + +    return reconstruct_phase(fdctrl) != fdctrl->phase; +} + +static const VMStateDescription vmstate_fdc_phase = { +    .name = "fdc/phase", +    .version_id = 1, +    .minimum_version_id = 1, +    .needed = fdc_phase_needed, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(phase, FDCtrl), +        VMSTATE_END_OF_LIST() +    } +}; + +static const VMStateDescription vmstate_fdc = { +    .name = "fdc", +    .version_id = 2, +    .minimum_version_id = 2, +    .pre_save = fdc_pre_save, +    .pre_load = fdc_pre_load, +    .post_load = fdc_post_load, +    .fields = (VMStateField[]) { +        /* Controller State */ +        VMSTATE_UINT8(sra, FDCtrl), +        VMSTATE_UINT8(srb, FDCtrl), +        VMSTATE_UINT8(dor_vmstate, FDCtrl), +        VMSTATE_UINT8(tdr, FDCtrl), +        VMSTATE_UINT8(dsr, FDCtrl), +        VMSTATE_UINT8(msr, FDCtrl), +        VMSTATE_UINT8(status0, FDCtrl), +        VMSTATE_UINT8(status1, FDCtrl), +        VMSTATE_UINT8(status2, FDCtrl), +        /* Command FIFO */ +        VMSTATE_VARRAY_INT32(fifo, FDCtrl, fifo_size, 0, vmstate_info_uint8, +                             uint8_t), +        VMSTATE_UINT32(data_pos, FDCtrl), +        VMSTATE_UINT32(data_len, FDCtrl), +        VMSTATE_UINT8(data_state, FDCtrl), +        VMSTATE_UINT8(data_dir, FDCtrl), +        VMSTATE_UINT8(eot, FDCtrl), +        /* States kept only to be returned back */ +        VMSTATE_UINT8(timer0, FDCtrl), +        VMSTATE_UINT8(timer1, FDCtrl), +        VMSTATE_UINT8(precomp_trk, FDCtrl), +        VMSTATE_UINT8(config, FDCtrl), +        VMSTATE_UINT8(lock, FDCtrl), +        VMSTATE_UINT8(pwrd, FDCtrl), +        VMSTATE_UINT8_EQUAL(num_floppies, FDCtrl), +        VMSTATE_STRUCT_ARRAY(drives, FDCtrl, MAX_FD, 1, +                             vmstate_fdrive, FDrive), +        VMSTATE_END_OF_LIST() +    }, +    .subsections = (const VMStateDescription*[]) { +        &vmstate_fdc_reset_sensei, +        &vmstate_fdc_result_timer, +        &vmstate_fdc_phase, +        NULL +    } +}; + +static void fdctrl_external_reset_sysbus(DeviceState *d) +{ +    FDCtrlSysBus *sys = SYSBUS_FDC(d); +    FDCtrl *s = &sys->state; + +    fdctrl_reset(s, 0); +} + +static void fdctrl_external_reset_isa(DeviceState *d) +{ +    FDCtrlISABus *isa = ISA_FDC(d); +    FDCtrl *s = &isa->state; + +    fdctrl_reset(s, 0); +} + +static void fdctrl_handle_tc(void *opaque, int irq, int level) +{ +    //FDCtrl *s = opaque; + +    if (level) { +        // XXX +        FLOPPY_DPRINTF("TC pulsed\n"); +    } +} + +/* Change IRQ state */ +static void fdctrl_reset_irq(FDCtrl *fdctrl) +{ +    fdctrl->status0 = 0; +    if (!(fdctrl->sra & FD_SRA_INTPEND)) +        return; +    FLOPPY_DPRINTF("Reset interrupt\n"); +    qemu_set_irq(fdctrl->irq, 0); +    fdctrl->sra &= ~FD_SRA_INTPEND; +} + +static void fdctrl_raise_irq(FDCtrl *fdctrl) +{ +    if (!(fdctrl->sra & FD_SRA_INTPEND)) { +        qemu_set_irq(fdctrl->irq, 1); +        fdctrl->sra |= FD_SRA_INTPEND; +    } + +    fdctrl->reset_sensei = 0; +    FLOPPY_DPRINTF("Set interrupt status to 0x%02x\n", fdctrl->status0); +} + +/* Reset controller */ +static void fdctrl_reset(FDCtrl *fdctrl, int do_irq) +{ +    int i; + +    FLOPPY_DPRINTF("reset controller\n"); +    fdctrl_reset_irq(fdctrl); +    /* Initialise controller */ +    fdctrl->sra = 0; +    fdctrl->srb = 0xc0; +    if (!fdctrl->drives[1].blk) { +        fdctrl->sra |= FD_SRA_nDRV2; +    } +    fdctrl->cur_drv = 0; +    fdctrl->dor = FD_DOR_nRESET; +    fdctrl->dor |= (fdctrl->dma_chann != -1) ? FD_DOR_DMAEN : 0; +    fdctrl->msr = FD_MSR_RQM; +    fdctrl->reset_sensei = 0; +    timer_del(fdctrl->result_timer); +    /* FIFO state */ +    fdctrl->data_pos = 0; +    fdctrl->data_len = 0; +    fdctrl->data_state = 0; +    fdctrl->data_dir = FD_DIR_WRITE; +    for (i = 0; i < MAX_FD; i++) +        fd_recalibrate(&fdctrl->drives[i]); +    fdctrl_to_command_phase(fdctrl); +    if (do_irq) { +        fdctrl->status0 |= FD_SR0_RDYCHG; +        fdctrl_raise_irq(fdctrl); +        fdctrl->reset_sensei = FD_RESET_SENSEI_COUNT; +    } +} + +static inline FDrive *drv0(FDCtrl *fdctrl) +{ +    return &fdctrl->drives[(fdctrl->tdr & FD_TDR_BOOTSEL) >> 2]; +} + +static inline FDrive *drv1(FDCtrl *fdctrl) +{ +    if ((fdctrl->tdr & FD_TDR_BOOTSEL) < (1 << 2)) +        return &fdctrl->drives[1]; +    else +        return &fdctrl->drives[0]; +} + +#if MAX_FD == 4 +static inline FDrive *drv2(FDCtrl *fdctrl) +{ +    if ((fdctrl->tdr & FD_TDR_BOOTSEL) < (2 << 2)) +        return &fdctrl->drives[2]; +    else +        return &fdctrl->drives[1]; +} + +static inline FDrive *drv3(FDCtrl *fdctrl) +{ +    if ((fdctrl->tdr & FD_TDR_BOOTSEL) < (3 << 2)) +        return &fdctrl->drives[3]; +    else +        return &fdctrl->drives[2]; +} +#endif + +static FDrive *get_cur_drv(FDCtrl *fdctrl) +{ +    switch (fdctrl->cur_drv) { +        case 0: return drv0(fdctrl); +        case 1: return drv1(fdctrl); +#if MAX_FD == 4 +        case 2: return drv2(fdctrl); +        case 3: return drv3(fdctrl); +#endif +        default: return NULL; +    } +} + +/* Status A register : 0x00 (read-only) */ +static uint32_t fdctrl_read_statusA(FDCtrl *fdctrl) +{ +    uint32_t retval = fdctrl->sra; + +    FLOPPY_DPRINTF("status register A: 0x%02x\n", retval); + +    return retval; +} + +/* Status B register : 0x01 (read-only) */ +static uint32_t fdctrl_read_statusB(FDCtrl *fdctrl) +{ +    uint32_t retval = fdctrl->srb; + +    FLOPPY_DPRINTF("status register B: 0x%02x\n", retval); + +    return retval; +} + +/* Digital output register : 0x02 */ +static uint32_t fdctrl_read_dor(FDCtrl *fdctrl) +{ +    uint32_t retval = fdctrl->dor; + +    /* Selected drive */ +    retval |= fdctrl->cur_drv; +    FLOPPY_DPRINTF("digital output register: 0x%02x\n", retval); + +    return retval; +} + +static void fdctrl_write_dor(FDCtrl *fdctrl, uint32_t value) +{ +    FLOPPY_DPRINTF("digital output register set to 0x%02x\n", value); + +    /* Motors */ +    if (value & FD_DOR_MOTEN0) +        fdctrl->srb |= FD_SRB_MTR0; +    else +        fdctrl->srb &= ~FD_SRB_MTR0; +    if (value & FD_DOR_MOTEN1) +        fdctrl->srb |= FD_SRB_MTR1; +    else +        fdctrl->srb &= ~FD_SRB_MTR1; + +    /* Drive */ +    if (value & 1) +        fdctrl->srb |= FD_SRB_DR0; +    else +        fdctrl->srb &= ~FD_SRB_DR0; + +    /* Reset */ +    if (!(value & FD_DOR_nRESET)) { +        if (fdctrl->dor & FD_DOR_nRESET) { +            FLOPPY_DPRINTF("controller enter RESET state\n"); +        } +    } else { +        if (!(fdctrl->dor & FD_DOR_nRESET)) { +            FLOPPY_DPRINTF("controller out of RESET state\n"); +            fdctrl_reset(fdctrl, 1); +            fdctrl->dsr &= ~FD_DSR_PWRDOWN; +        } +    } +    /* Selected drive */ +    fdctrl->cur_drv = value & FD_DOR_SELMASK; + +    fdctrl->dor = value; +} + +/* Tape drive register : 0x03 */ +static uint32_t fdctrl_read_tape(FDCtrl *fdctrl) +{ +    uint32_t retval = fdctrl->tdr; + +    FLOPPY_DPRINTF("tape drive register: 0x%02x\n", retval); + +    return retval; +} + +static void fdctrl_write_tape(FDCtrl *fdctrl, uint32_t value) +{ +    /* Reset mode */ +    if (!(fdctrl->dor & FD_DOR_nRESET)) { +        FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); +        return; +    } +    FLOPPY_DPRINTF("tape drive register set to 0x%02x\n", value); +    /* Disk boot selection indicator */ +    fdctrl->tdr = value & FD_TDR_BOOTSEL; +    /* Tape indicators: never allow */ +} + +/* Main status register : 0x04 (read) */ +static uint32_t fdctrl_read_main_status(FDCtrl *fdctrl) +{ +    uint32_t retval = fdctrl->msr; + +    fdctrl->dsr &= ~FD_DSR_PWRDOWN; +    fdctrl->dor |= FD_DOR_nRESET; + +    FLOPPY_DPRINTF("main status register: 0x%02x\n", retval); + +    return retval; +} + +/* Data select rate register : 0x04 (write) */ +static void fdctrl_write_rate(FDCtrl *fdctrl, uint32_t value) +{ +    /* Reset mode */ +    if (!(fdctrl->dor & FD_DOR_nRESET)) { +        FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); +        return; +    } +    FLOPPY_DPRINTF("select rate register set to 0x%02x\n", value); +    /* Reset: autoclear */ +    if (value & FD_DSR_SWRESET) { +        fdctrl->dor &= ~FD_DOR_nRESET; +        fdctrl_reset(fdctrl, 1); +        fdctrl->dor |= FD_DOR_nRESET; +    } +    if (value & FD_DSR_PWRDOWN) { +        fdctrl_reset(fdctrl, 1); +    } +    fdctrl->dsr = value; +} + +/* Configuration control register: 0x07 (write) */ +static void fdctrl_write_ccr(FDCtrl *fdctrl, uint32_t value) +{ +    /* Reset mode */ +    if (!(fdctrl->dor & FD_DOR_nRESET)) { +        FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); +        return; +    } +    FLOPPY_DPRINTF("configuration control register set to 0x%02x\n", value); + +    /* Only the rate selection bits used in AT mode, and we +     * store those in the DSR. +     */ +    fdctrl->dsr = (fdctrl->dsr & ~FD_DSR_DRATEMASK) | +                  (value & FD_DSR_DRATEMASK); +} + +static int fdctrl_media_changed(FDrive *drv) +{ +    return drv->media_changed; +} + +/* Digital input register : 0x07 (read-only) */ +static uint32_t fdctrl_read_dir(FDCtrl *fdctrl) +{ +    uint32_t retval = 0; + +    if (fdctrl_media_changed(get_cur_drv(fdctrl))) { +        retval |= FD_DIR_DSKCHG; +    } +    if (retval != 0) { +        FLOPPY_DPRINTF("Floppy digital input register: 0x%02x\n", retval); +    } + +    return retval; +} + +/* Clear the FIFO and update the state for receiving the next command */ +static void fdctrl_to_command_phase(FDCtrl *fdctrl) +{ +    fdctrl->phase = FD_PHASE_COMMAND; +    fdctrl->data_dir = FD_DIR_WRITE; +    fdctrl->data_pos = 0; +    fdctrl->data_len = 1; /* Accept command byte, adjust for params later */ +    fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO); +    fdctrl->msr |= FD_MSR_RQM; +} + +/* Update the state to allow the guest to read out the command status. + * @fifo_len is the number of result bytes to be read out. */ +static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len) +{ +    fdctrl->phase = FD_PHASE_RESULT; +    fdctrl->data_dir = FD_DIR_READ; +    fdctrl->data_len = fifo_len; +    fdctrl->data_pos = 0; +    fdctrl->msr |= FD_MSR_CMDBUSY | FD_MSR_RQM | FD_MSR_DIO; +} + +/* Set an error: unimplemented/unknown command */ +static void fdctrl_unimplemented(FDCtrl *fdctrl, int direction) +{ +    qemu_log_mask(LOG_UNIMP, "fdc: unimplemented command 0x%02x\n", +                  fdctrl->fifo[0]); +    fdctrl->fifo[0] = FD_SR0_INVCMD; +    fdctrl_to_result_phase(fdctrl, 1); +} + +/* Seek to next sector + * returns 0 when end of track reached (for DBL_SIDES on head 1) + * otherwise returns 1 + */ +static int fdctrl_seek_to_next_sect(FDCtrl *fdctrl, FDrive *cur_drv) +{ +    FLOPPY_DPRINTF("seek to next sector (%d %02x %02x => %d)\n", +                   cur_drv->head, cur_drv->track, cur_drv->sect, +                   fd_sector(cur_drv)); +    /* XXX: cur_drv->sect >= cur_drv->last_sect should be an +       error in fact */ +    uint8_t new_head = cur_drv->head; +    uint8_t new_track = cur_drv->track; +    uint8_t new_sect = cur_drv->sect; + +    int ret = 1; + +    if (new_sect >= cur_drv->last_sect || +        new_sect == fdctrl->eot) { +        new_sect = 1; +        if (FD_MULTI_TRACK(fdctrl->data_state)) { +            if (new_head == 0 && +                (cur_drv->flags & FDISK_DBL_SIDES) != 0) { +                new_head = 1; +            } else { +                new_head = 0; +                new_track++; +                fdctrl->status0 |= FD_SR0_SEEK; +                if ((cur_drv->flags & FDISK_DBL_SIDES) == 0) { +                    ret = 0; +                } +            } +        } else { +            fdctrl->status0 |= FD_SR0_SEEK; +            new_track++; +            ret = 0; +        } +        if (ret == 1) { +            FLOPPY_DPRINTF("seek to next track (%d %02x %02x => %d)\n", +                    new_head, new_track, new_sect, fd_sector(cur_drv)); +        } +    } else { +        new_sect++; +    } +    fd_seek(cur_drv, new_head, new_track, new_sect, 1); +    return ret; +} + +/* Callback for transfer end (stop or abort) */ +static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0, +                                 uint8_t status1, uint8_t status2) +{ +    FDrive *cur_drv; +    cur_drv = get_cur_drv(fdctrl); + +    fdctrl->status0 &= ~(FD_SR0_DS0 | FD_SR0_DS1 | FD_SR0_HEAD); +    fdctrl->status0 |= GET_CUR_DRV(fdctrl); +    if (cur_drv->head) { +        fdctrl->status0 |= FD_SR0_HEAD; +    } +    fdctrl->status0 |= status0; + +    FLOPPY_DPRINTF("transfer status: %02x %02x %02x (%02x)\n", +                   status0, status1, status2, fdctrl->status0); +    fdctrl->fifo[0] = fdctrl->status0; +    fdctrl->fifo[1] = status1; +    fdctrl->fifo[2] = status2; +    fdctrl->fifo[3] = cur_drv->track; +    fdctrl->fifo[4] = cur_drv->head; +    fdctrl->fifo[5] = cur_drv->sect; +    fdctrl->fifo[6] = FD_SECTOR_SC; +    fdctrl->data_dir = FD_DIR_READ; +    if (!(fdctrl->msr & FD_MSR_NONDMA)) { +        DMA_release_DREQ(fdctrl->dma_chann); +    } +    fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO; +    fdctrl->msr &= ~FD_MSR_NONDMA; + +    fdctrl_to_result_phase(fdctrl, 7); +    fdctrl_raise_irq(fdctrl); +} + +/* Prepare a data transfer (either DMA or FIFO) */ +static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; +    uint8_t kh, kt, ks; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    kt = fdctrl->fifo[2]; +    kh = fdctrl->fifo[3]; +    ks = fdctrl->fifo[4]; +    FLOPPY_DPRINTF("Start transfer at %d %d %02x %02x (%d)\n", +                   GET_CUR_DRV(fdctrl), kh, kt, ks, +                   fd_sector_calc(kh, kt, ks, cur_drv->last_sect, +                                  NUM_SIDES(cur_drv))); +    switch (fd_seek(cur_drv, kh, kt, ks, fdctrl->config & FD_CONFIG_EIS)) { +    case 2: +        /* sect too big */ +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    case 3: +        /* track too big */ +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_EC, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    case 4: +        /* No seek enabled */ +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    case 1: +        fdctrl->status0 |= FD_SR0_SEEK; +        break; +    default: +        break; +    } + +    /* Check the data rate. If the programmed data rate does not match +     * the currently inserted medium, the operation has to fail. */ +    if (fdctrl->check_media_rate && +        (fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) { +        FLOPPY_DPRINTF("data rate mismatch (fdc=%d, media=%d)\n", +                       fdctrl->dsr & FD_DSR_DRATEMASK, cur_drv->media_rate); +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    } + +    /* Set the FIFO state */ +    fdctrl->data_dir = direction; +    fdctrl->data_pos = 0; +    assert(fdctrl->msr & FD_MSR_CMDBUSY); +    if (fdctrl->fifo[0] & 0x80) +        fdctrl->data_state |= FD_STATE_MULTI; +    else +        fdctrl->data_state &= ~FD_STATE_MULTI; +    if (fdctrl->fifo[5] == 0) { +        fdctrl->data_len = fdctrl->fifo[8]; +    } else { +        int tmp; +        fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); +        tmp = (fdctrl->fifo[6] - ks + 1); +        if (fdctrl->fifo[0] & 0x80) +            tmp += fdctrl->fifo[6]; +        fdctrl->data_len *= tmp; +    } +    fdctrl->eot = fdctrl->fifo[6]; +    if (fdctrl->dor & FD_DOR_DMAEN) { +        int dma_mode; +        /* DMA transfer are enabled. Check if DMA channel is well programmed */ +        dma_mode = DMA_get_channel_mode(fdctrl->dma_chann); +        dma_mode = (dma_mode >> 2) & 3; +        FLOPPY_DPRINTF("dma_mode=%d direction=%d (%d - %d)\n", +                       dma_mode, direction, +                       (128 << fdctrl->fifo[5]) * +                       (cur_drv->last_sect - ks + 1), fdctrl->data_len); +        if (((direction == FD_DIR_SCANE || direction == FD_DIR_SCANL || +              direction == FD_DIR_SCANH) && dma_mode == 0) || +            (direction == FD_DIR_WRITE && dma_mode == 2) || +            (direction == FD_DIR_READ && dma_mode == 1) || +            (direction == FD_DIR_VERIFY)) { +            /* No access is allowed until DMA transfer has completed */ +            fdctrl->msr &= ~FD_MSR_RQM; +            if (direction != FD_DIR_VERIFY) { +                /* Now, we just have to wait for the DMA controller to +                 * recall us... +                 */ +                DMA_hold_DREQ(fdctrl->dma_chann); +                DMA_schedule(fdctrl->dma_chann); +            } else { +                /* Start transfer */ +                fdctrl_transfer_handler(fdctrl, fdctrl->dma_chann, 0, +                                        fdctrl->data_len); +            } +            return; +        } else { +            FLOPPY_DPRINTF("bad dma_mode=%d direction=%d\n", dma_mode, +                           direction); +        } +    } +    FLOPPY_DPRINTF("start non-DMA transfer\n"); +    fdctrl->msr |= FD_MSR_NONDMA | FD_MSR_RQM; +    if (direction != FD_DIR_WRITE) +        fdctrl->msr |= FD_MSR_DIO; +    /* IO based transfer: calculate len */ +    fdctrl_raise_irq(fdctrl); +} + +/* Prepare a transfer of deleted data */ +static void fdctrl_start_transfer_del(FDCtrl *fdctrl, int direction) +{ +    qemu_log_mask(LOG_UNIMP, "fdctrl_start_transfer_del() unimplemented\n"); + +    /* We don't handle deleted data, +     * so we don't return *ANYTHING* +     */ +    fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); +} + +/* handlers for DMA transfers */ +static int fdctrl_transfer_handler (void *opaque, int nchan, +                                    int dma_pos, int dma_len) +{ +    FDCtrl *fdctrl; +    FDrive *cur_drv; +    int len, start_pos, rel_pos; +    uint8_t status0 = 0x00, status1 = 0x00, status2 = 0x00; + +    fdctrl = opaque; +    if (fdctrl->msr & FD_MSR_RQM) { +        FLOPPY_DPRINTF("Not in DMA transfer mode !\n"); +        return 0; +    } +    cur_drv = get_cur_drv(fdctrl); +    if (fdctrl->data_dir == FD_DIR_SCANE || fdctrl->data_dir == FD_DIR_SCANL || +        fdctrl->data_dir == FD_DIR_SCANH) +        status2 = FD_SR2_SNS; +    if (dma_len > fdctrl->data_len) +        dma_len = fdctrl->data_len; +    if (cur_drv->blk == NULL) { +        if (fdctrl->data_dir == FD_DIR_WRITE) +            fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); +        else +            fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); +        len = 0; +        goto transfer_error; +    } +    rel_pos = fdctrl->data_pos % FD_SECTOR_LEN; +    for (start_pos = fdctrl->data_pos; fdctrl->data_pos < dma_len;) { +        len = dma_len - fdctrl->data_pos; +        if (len + rel_pos > FD_SECTOR_LEN) +            len = FD_SECTOR_LEN - rel_pos; +        FLOPPY_DPRINTF("copy %d bytes (%d %d %d) %d pos %d %02x " +                       "(%d-0x%08x 0x%08x)\n", len, dma_len, fdctrl->data_pos, +                       fdctrl->data_len, GET_CUR_DRV(fdctrl), cur_drv->head, +                       cur_drv->track, cur_drv->sect, fd_sector(cur_drv), +                       fd_sector(cur_drv) * FD_SECTOR_LEN); +        if (fdctrl->data_dir != FD_DIR_WRITE || +            len < FD_SECTOR_LEN || rel_pos != 0) { +            /* READ & SCAN commands and realign to a sector for WRITE */ +            if (blk_read(cur_drv->blk, fd_sector(cur_drv), +                         fdctrl->fifo, 1) < 0) { +                FLOPPY_DPRINTF("Floppy: error getting sector %d\n", +                               fd_sector(cur_drv)); +                /* Sure, image size is too small... */ +                memset(fdctrl->fifo, 0, FD_SECTOR_LEN); +            } +        } +        switch (fdctrl->data_dir) { +        case FD_DIR_READ: +            /* READ commands */ +            DMA_write_memory (nchan, fdctrl->fifo + rel_pos, +                              fdctrl->data_pos, len); +            break; +        case FD_DIR_WRITE: +            /* WRITE commands */ +            if (cur_drv->ro) { +                /* Handle readonly medium early, no need to do DMA, touch the +                 * LED or attempt any writes. A real floppy doesn't attempt +                 * to write to readonly media either. */ +                fdctrl_stop_transfer(fdctrl, +                                     FD_SR0_ABNTERM | FD_SR0_SEEK, FD_SR1_NW, +                                     0x00); +                goto transfer_error; +            } + +            DMA_read_memory (nchan, fdctrl->fifo + rel_pos, +                             fdctrl->data_pos, len); +            if (blk_write(cur_drv->blk, fd_sector(cur_drv), +                          fdctrl->fifo, 1) < 0) { +                FLOPPY_DPRINTF("error writing sector %d\n", +                               fd_sector(cur_drv)); +                fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); +                goto transfer_error; +            } +            break; +        case FD_DIR_VERIFY: +            /* VERIFY commands */ +            break; +        default: +            /* SCAN commands */ +            { +                uint8_t tmpbuf[FD_SECTOR_LEN]; +                int ret; +                DMA_read_memory (nchan, tmpbuf, fdctrl->data_pos, len); +                ret = memcmp(tmpbuf, fdctrl->fifo + rel_pos, len); +                if (ret == 0) { +                    status2 = FD_SR2_SEH; +                    goto end_transfer; +                } +                if ((ret < 0 && fdctrl->data_dir == FD_DIR_SCANL) || +                    (ret > 0 && fdctrl->data_dir == FD_DIR_SCANH)) { +                    status2 = 0x00; +                    goto end_transfer; +                } +            } +            break; +        } +        fdctrl->data_pos += len; +        rel_pos = fdctrl->data_pos % FD_SECTOR_LEN; +        if (rel_pos == 0) { +            /* Seek to next sector */ +            if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) +                break; +        } +    } + end_transfer: +    len = fdctrl->data_pos - start_pos; +    FLOPPY_DPRINTF("end transfer %d %d %d\n", +                   fdctrl->data_pos, len, fdctrl->data_len); +    if (fdctrl->data_dir == FD_DIR_SCANE || +        fdctrl->data_dir == FD_DIR_SCANL || +        fdctrl->data_dir == FD_DIR_SCANH) +        status2 = FD_SR2_SEH; +    fdctrl->data_len -= len; +    fdctrl_stop_transfer(fdctrl, status0, status1, status2); + transfer_error: + +    return len; +} + +/* Data register : 0x05 */ +static uint32_t fdctrl_read_data(FDCtrl *fdctrl) +{ +    FDrive *cur_drv; +    uint32_t retval = 0; +    uint32_t pos; + +    cur_drv = get_cur_drv(fdctrl); +    fdctrl->dsr &= ~FD_DSR_PWRDOWN; +    if (!(fdctrl->msr & FD_MSR_RQM) || !(fdctrl->msr & FD_MSR_DIO)) { +        FLOPPY_DPRINTF("error: controller not ready for reading\n"); +        return 0; +    } + +    /* If data_len spans multiple sectors, the current position in the FIFO +     * wraps around while fdctrl->data_pos is the real position in the whole +     * request. */ +    pos = fdctrl->data_pos; +    pos %= FD_SECTOR_LEN; + +    switch (fdctrl->phase) { +    case FD_PHASE_EXECUTION: +        assert(fdctrl->msr & FD_MSR_NONDMA); +        if (pos == 0) { +            if (fdctrl->data_pos != 0) +                if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) { +                    FLOPPY_DPRINTF("error seeking to next sector %d\n", +                                   fd_sector(cur_drv)); +                    return 0; +                } +            if (blk_read(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) +                < 0) { +                FLOPPY_DPRINTF("error getting sector %d\n", +                               fd_sector(cur_drv)); +                /* Sure, image size is too small... */ +                memset(fdctrl->fifo, 0, FD_SECTOR_LEN); +            } +        } + +        if (++fdctrl->data_pos == fdctrl->data_len) { +            fdctrl->msr &= ~FD_MSR_RQM; +            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); +        } +        break; + +    case FD_PHASE_RESULT: +        assert(!(fdctrl->msr & FD_MSR_NONDMA)); +        if (++fdctrl->data_pos == fdctrl->data_len) { +            fdctrl->msr &= ~FD_MSR_RQM; +            fdctrl_to_command_phase(fdctrl); +            fdctrl_reset_irq(fdctrl); +        } +        break; + +    case FD_PHASE_COMMAND: +    default: +        abort(); +    } + +    retval = fdctrl->fifo[pos]; +    FLOPPY_DPRINTF("data register: 0x%02x\n", retval); + +    return retval; +} + +static void fdctrl_format_sector(FDCtrl *fdctrl) +{ +    FDrive *cur_drv; +    uint8_t kh, kt, ks; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    kt = fdctrl->fifo[6]; +    kh = fdctrl->fifo[7]; +    ks = fdctrl->fifo[8]; +    FLOPPY_DPRINTF("format sector at %d %d %02x %02x (%d)\n", +                   GET_CUR_DRV(fdctrl), kh, kt, ks, +                   fd_sector_calc(kh, kt, ks, cur_drv->last_sect, +                                  NUM_SIDES(cur_drv))); +    switch (fd_seek(cur_drv, kh, kt, ks, fdctrl->config & FD_CONFIG_EIS)) { +    case 2: +        /* sect too big */ +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    case 3: +        /* track too big */ +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_EC, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    case 4: +        /* No seek enabled */ +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, 0x00, 0x00); +        fdctrl->fifo[3] = kt; +        fdctrl->fifo[4] = kh; +        fdctrl->fifo[5] = ks; +        return; +    case 1: +        fdctrl->status0 |= FD_SR0_SEEK; +        break; +    default: +        break; +    } +    memset(fdctrl->fifo, 0, FD_SECTOR_LEN); +    if (cur_drv->blk == NULL || +        blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) < 0) { +        FLOPPY_DPRINTF("error formatting sector %d\n", fd_sector(cur_drv)); +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM | FD_SR0_SEEK, 0x00, 0x00); +    } else { +        if (cur_drv->sect == cur_drv->last_sect) { +            fdctrl->data_state &= ~FD_STATE_FORMAT; +            /* Last sector done */ +            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); +        } else { +            /* More to do */ +            fdctrl->data_pos = 0; +            fdctrl->data_len = 4; +        } +    } +} + +static void fdctrl_handle_lock(FDCtrl *fdctrl, int direction) +{ +    fdctrl->lock = (fdctrl->fifo[0] & 0x80) ? 1 : 0; +    fdctrl->fifo[0] = fdctrl->lock << 4; +    fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    /* Drives position */ +    fdctrl->fifo[0] = drv0(fdctrl)->track; +    fdctrl->fifo[1] = drv1(fdctrl)->track; +#if MAX_FD == 4 +    fdctrl->fifo[2] = drv2(fdctrl)->track; +    fdctrl->fifo[3] = drv3(fdctrl)->track; +#else +    fdctrl->fifo[2] = 0; +    fdctrl->fifo[3] = 0; +#endif +    /* timers */ +    fdctrl->fifo[4] = fdctrl->timer0; +    fdctrl->fifo[5] = (fdctrl->timer1 << 1) | (fdctrl->dor & FD_DOR_DMAEN ? 1 : 0); +    fdctrl->fifo[6] = cur_drv->last_sect; +    fdctrl->fifo[7] = (fdctrl->lock << 7) | +        (cur_drv->perpendicular << 2); +    fdctrl->fifo[8] = fdctrl->config; +    fdctrl->fifo[9] = fdctrl->precomp_trk; +    fdctrl_to_result_phase(fdctrl, 10); +} + +static void fdctrl_handle_version(FDCtrl *fdctrl, int direction) +{ +    /* Controller's version */ +    fdctrl->fifo[0] = fdctrl->version; +    fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_partid(FDCtrl *fdctrl, int direction) +{ +    fdctrl->fifo[0] = 0x41; /* Stepping 1 */ +    fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    /* Drives position */ +    drv0(fdctrl)->track = fdctrl->fifo[3]; +    drv1(fdctrl)->track = fdctrl->fifo[4]; +#if MAX_FD == 4 +    drv2(fdctrl)->track = fdctrl->fifo[5]; +    drv3(fdctrl)->track = fdctrl->fifo[6]; +#endif +    /* timers */ +    fdctrl->timer0 = fdctrl->fifo[7]; +    fdctrl->timer1 = fdctrl->fifo[8]; +    cur_drv->last_sect = fdctrl->fifo[9]; +    fdctrl->lock = fdctrl->fifo[10] >> 7; +    cur_drv->perpendicular = (fdctrl->fifo[10] >> 2) & 0xF; +    fdctrl->config = fdctrl->fifo[11]; +    fdctrl->precomp_trk = fdctrl->fifo[12]; +    fdctrl->pwrd = fdctrl->fifo[13]; +    fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_save(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    fdctrl->fifo[0] = 0; +    fdctrl->fifo[1] = 0; +    /* Drives position */ +    fdctrl->fifo[2] = drv0(fdctrl)->track; +    fdctrl->fifo[3] = drv1(fdctrl)->track; +#if MAX_FD == 4 +    fdctrl->fifo[4] = drv2(fdctrl)->track; +    fdctrl->fifo[5] = drv3(fdctrl)->track; +#else +    fdctrl->fifo[4] = 0; +    fdctrl->fifo[5] = 0; +#endif +    /* timers */ +    fdctrl->fifo[6] = fdctrl->timer0; +    fdctrl->fifo[7] = fdctrl->timer1; +    fdctrl->fifo[8] = cur_drv->last_sect; +    fdctrl->fifo[9] = (fdctrl->lock << 7) | +        (cur_drv->perpendicular << 2); +    fdctrl->fifo[10] = fdctrl->config; +    fdctrl->fifo[11] = fdctrl->precomp_trk; +    fdctrl->fifo[12] = fdctrl->pwrd; +    fdctrl->fifo[13] = 0; +    fdctrl->fifo[14] = 0; +    fdctrl_to_result_phase(fdctrl, 15); +} + +static void fdctrl_handle_readid(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    cur_drv->head = (fdctrl->fifo[1] >> 2) & 1; +    timer_mod(fdctrl->result_timer, +                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() / 50)); +} + +static void fdctrl_handle_format_track(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    fdctrl->data_state |= FD_STATE_FORMAT; +    if (fdctrl->fifo[0] & 0x80) +        fdctrl->data_state |= FD_STATE_MULTI; +    else +        fdctrl->data_state &= ~FD_STATE_MULTI; +    cur_drv->bps = +        fdctrl->fifo[2] > 7 ? 16384 : 128 << fdctrl->fifo[2]; +#if 0 +    cur_drv->last_sect = +        cur_drv->flags & FDISK_DBL_SIDES ? fdctrl->fifo[3] : +        fdctrl->fifo[3] / 2; +#else +    cur_drv->last_sect = fdctrl->fifo[3]; +#endif +    /* TODO: implement format using DMA expected by the Bochs BIOS +     * and Linux fdformat (read 3 bytes per sector via DMA and fill +     * the sector with the specified fill byte +     */ +    fdctrl->data_state &= ~FD_STATE_FORMAT; +    fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); +} + +static void fdctrl_handle_specify(FDCtrl *fdctrl, int direction) +{ +    fdctrl->timer0 = (fdctrl->fifo[1] >> 4) & 0xF; +    fdctrl->timer1 = fdctrl->fifo[2] >> 1; +    if (fdctrl->fifo[2] & 1) +        fdctrl->dor &= ~FD_DOR_DMAEN; +    else +        fdctrl->dor |= FD_DOR_DMAEN; +    /* No result back */ +    fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    cur_drv->head = (fdctrl->fifo[1] >> 2) & 1; +    /* 1 Byte status back */ +    fdctrl->fifo[0] = (cur_drv->ro << 6) | +        (cur_drv->track == 0 ? 0x10 : 0x00) | +        (cur_drv->head << 2) | +        GET_CUR_DRV(fdctrl) | +        0x28; +    fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    fd_recalibrate(cur_drv); +    fdctrl_to_command_phase(fdctrl); +    /* Raise Interrupt */ +    fdctrl->status0 |= FD_SR0_SEEK; +    fdctrl_raise_irq(fdctrl); +} + +static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    if (fdctrl->reset_sensei > 0) { +        fdctrl->fifo[0] = +            FD_SR0_RDYCHG + FD_RESET_SENSEI_COUNT - fdctrl->reset_sensei; +        fdctrl->reset_sensei--; +    } else if (!(fdctrl->sra & FD_SRA_INTPEND)) { +        fdctrl->fifo[0] = FD_SR0_INVCMD; +        fdctrl_to_result_phase(fdctrl, 1); +        return; +    } else { +        fdctrl->fifo[0] = +                (fdctrl->status0 & ~(FD_SR0_HEAD | FD_SR0_DS1 | FD_SR0_DS0)) +                | GET_CUR_DRV(fdctrl); +    } + +    fdctrl->fifo[1] = cur_drv->track; +    fdctrl_to_result_phase(fdctrl, 2); +    fdctrl_reset_irq(fdctrl); +    fdctrl->status0 = FD_SR0_RDYCHG; +} + +static void fdctrl_handle_seek(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    fdctrl_to_command_phase(fdctrl); +    /* The seek command just sends step pulses to the drive and doesn't care if +     * there is a medium inserted of if it's banging the head against the drive. +     */ +    fd_seek(cur_drv, cur_drv->head, fdctrl->fifo[2], cur_drv->sect, 1); +    /* Raise Interrupt */ +    fdctrl->status0 |= FD_SR0_SEEK; +    fdctrl_raise_irq(fdctrl); +} + +static void fdctrl_handle_perpendicular_mode(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    if (fdctrl->fifo[1] & 0x80) +        cur_drv->perpendicular = fdctrl->fifo[1] & 0x7; +    /* No result back */ +    fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction) +{ +    fdctrl->config = fdctrl->fifo[2]; +    fdctrl->precomp_trk =  fdctrl->fifo[3]; +    /* No result back */ +    fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction) +{ +    fdctrl->pwrd = fdctrl->fifo[1]; +    fdctrl->fifo[0] = fdctrl->fifo[1]; +    fdctrl_to_result_phase(fdctrl, 1); +} + +static void fdctrl_handle_option(FDCtrl *fdctrl, int direction) +{ +    /* No result back */ +    fdctrl_to_command_phase(fdctrl); +} + +static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv = get_cur_drv(fdctrl); +    uint32_t pos; + +    pos = fdctrl->data_pos - 1; +    pos %= FD_SECTOR_LEN; +    if (fdctrl->fifo[pos] & 0x80) { +        /* Command parameters done */ +        if (fdctrl->fifo[pos] & 0x40) { +            fdctrl->fifo[0] = fdctrl->fifo[1]; +            fdctrl->fifo[2] = 0; +            fdctrl->fifo[3] = 0; +            fdctrl_to_result_phase(fdctrl, 4); +        } else { +            fdctrl_to_command_phase(fdctrl); +        } +    } else if (fdctrl->data_len > 7) { +        /* ERROR */ +        fdctrl->fifo[0] = 0x80 | +            (cur_drv->head << 2) | GET_CUR_DRV(fdctrl); +        fdctrl_to_result_phase(fdctrl, 1); +    } +} + +static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    if (fdctrl->fifo[2] + cur_drv->track >= cur_drv->max_track) { +        fd_seek(cur_drv, cur_drv->head, cur_drv->max_track - 1, +                cur_drv->sect, 1); +    } else { +        fd_seek(cur_drv, cur_drv->head, +                cur_drv->track + fdctrl->fifo[2], cur_drv->sect, 1); +    } +    fdctrl_to_command_phase(fdctrl); +    /* Raise Interrupt */ +    fdctrl->status0 |= FD_SR0_SEEK; +    fdctrl_raise_irq(fdctrl); +} + +static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction) +{ +    FDrive *cur_drv; + +    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK); +    cur_drv = get_cur_drv(fdctrl); +    if (fdctrl->fifo[2] > cur_drv->track) { +        fd_seek(cur_drv, cur_drv->head, 0, cur_drv->sect, 1); +    } else { +        fd_seek(cur_drv, cur_drv->head, +                cur_drv->track - fdctrl->fifo[2], cur_drv->sect, 1); +    } +    fdctrl_to_command_phase(fdctrl); +    /* Raise Interrupt */ +    fdctrl->status0 |= FD_SR0_SEEK; +    fdctrl_raise_irq(fdctrl); +} + +/* + * Handlers for the execution phase of each command + */ +typedef struct FDCtrlCommand { +    uint8_t value; +    uint8_t mask; +    const char* name; +    int parameters; +    void (*handler)(FDCtrl *fdctrl, int direction); +    int direction; +} FDCtrlCommand; + +static const FDCtrlCommand handlers[] = { +    { FD_CMD_READ, 0x1f, "READ", 8, fdctrl_start_transfer, FD_DIR_READ }, +    { FD_CMD_WRITE, 0x3f, "WRITE", 8, fdctrl_start_transfer, FD_DIR_WRITE }, +    { FD_CMD_SEEK, 0xff, "SEEK", 2, fdctrl_handle_seek }, +    { FD_CMD_SENSE_INTERRUPT_STATUS, 0xff, "SENSE INTERRUPT STATUS", 0, fdctrl_handle_sense_interrupt_status }, +    { FD_CMD_RECALIBRATE, 0xff, "RECALIBRATE", 1, fdctrl_handle_recalibrate }, +    { FD_CMD_FORMAT_TRACK, 0xbf, "FORMAT TRACK", 5, fdctrl_handle_format_track }, +    { FD_CMD_READ_TRACK, 0xbf, "READ TRACK", 8, fdctrl_start_transfer, FD_DIR_READ }, +    { FD_CMD_RESTORE, 0xff, "RESTORE", 17, fdctrl_handle_restore }, /* part of READ DELETED DATA */ +    { FD_CMD_SAVE, 0xff, "SAVE", 0, fdctrl_handle_save }, /* part of READ DELETED DATA */ +    { FD_CMD_READ_DELETED, 0x1f, "READ DELETED DATA", 8, fdctrl_start_transfer_del, FD_DIR_READ }, +    { FD_CMD_SCAN_EQUAL, 0x1f, "SCAN EQUAL", 8, fdctrl_start_transfer, FD_DIR_SCANE }, +    { FD_CMD_VERIFY, 0x1f, "VERIFY", 8, fdctrl_start_transfer, FD_DIR_VERIFY }, +    { FD_CMD_SCAN_LOW_OR_EQUAL, 0x1f, "SCAN LOW OR EQUAL", 8, fdctrl_start_transfer, FD_DIR_SCANL }, +    { FD_CMD_SCAN_HIGH_OR_EQUAL, 0x1f, "SCAN HIGH OR EQUAL", 8, fdctrl_start_transfer, FD_DIR_SCANH }, +    { FD_CMD_WRITE_DELETED, 0x3f, "WRITE DELETED DATA", 8, fdctrl_start_transfer_del, FD_DIR_WRITE }, +    { FD_CMD_READ_ID, 0xbf, "READ ID", 1, fdctrl_handle_readid }, +    { FD_CMD_SPECIFY, 0xff, "SPECIFY", 2, fdctrl_handle_specify }, +    { FD_CMD_SENSE_DRIVE_STATUS, 0xff, "SENSE DRIVE STATUS", 1, fdctrl_handle_sense_drive_status }, +    { FD_CMD_PERPENDICULAR_MODE, 0xff, "PERPENDICULAR MODE", 1, fdctrl_handle_perpendicular_mode }, +    { FD_CMD_CONFIGURE, 0xff, "CONFIGURE", 3, fdctrl_handle_configure }, +    { FD_CMD_POWERDOWN_MODE, 0xff, "POWERDOWN MODE", 2, fdctrl_handle_powerdown_mode }, +    { FD_CMD_OPTION, 0xff, "OPTION", 1, fdctrl_handle_option }, +    { FD_CMD_DRIVE_SPECIFICATION_COMMAND, 0xff, "DRIVE SPECIFICATION COMMAND", 5, fdctrl_handle_drive_specification_command }, +    { FD_CMD_RELATIVE_SEEK_OUT, 0xff, "RELATIVE SEEK OUT", 2, fdctrl_handle_relative_seek_out }, +    { FD_CMD_FORMAT_AND_WRITE, 0xff, "FORMAT AND WRITE", 10, fdctrl_unimplemented }, +    { FD_CMD_RELATIVE_SEEK_IN, 0xff, "RELATIVE SEEK IN", 2, fdctrl_handle_relative_seek_in }, +    { FD_CMD_LOCK, 0x7f, "LOCK", 0, fdctrl_handle_lock }, +    { FD_CMD_DUMPREG, 0xff, "DUMPREG", 0, fdctrl_handle_dumpreg }, +    { FD_CMD_VERSION, 0xff, "VERSION", 0, fdctrl_handle_version }, +    { FD_CMD_PART_ID, 0xff, "PART ID", 0, fdctrl_handle_partid }, +    { FD_CMD_WRITE, 0x1f, "WRITE (BeOS)", 8, fdctrl_start_transfer, FD_DIR_WRITE }, /* not in specification ; BeOS 4.5 bug */ +    { 0, 0, "unknown", 0, fdctrl_unimplemented }, /* default handler */ +}; +/* Associate command to an index in the 'handlers' array */ +static uint8_t command_to_handler[256]; + +static const FDCtrlCommand *get_command(uint8_t cmd) +{ +    int idx; + +    idx = command_to_handler[cmd]; +    FLOPPY_DPRINTF("%s command\n", handlers[idx].name); +    return &handlers[idx]; +} + +static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value) +{ +    FDrive *cur_drv; +    const FDCtrlCommand *cmd; +    uint32_t pos; + +    /* Reset mode */ +    if (!(fdctrl->dor & FD_DOR_nRESET)) { +        FLOPPY_DPRINTF("Floppy controller in RESET state !\n"); +        return; +    } +    if (!(fdctrl->msr & FD_MSR_RQM) || (fdctrl->msr & FD_MSR_DIO)) { +        FLOPPY_DPRINTF("error: controller not ready for writing\n"); +        return; +    } +    fdctrl->dsr &= ~FD_DSR_PWRDOWN; + +    FLOPPY_DPRINTF("%s: %02x\n", __func__, value); + +    /* If data_len spans multiple sectors, the current position in the FIFO +     * wraps around while fdctrl->data_pos is the real position in the whole +     * request. */ +    pos = fdctrl->data_pos++; +    pos %= FD_SECTOR_LEN; +    fdctrl->fifo[pos] = value; + +    if (fdctrl->data_pos == fdctrl->data_len) { +        fdctrl->msr &= ~FD_MSR_RQM; +    } + +    switch (fdctrl->phase) { +    case FD_PHASE_EXECUTION: +        /* For DMA requests, RQM should be cleared during execution phase, so +         * we would have errored out above. */ +        assert(fdctrl->msr & FD_MSR_NONDMA); + +        /* FIFO data write */ +        if (pos == FD_SECTOR_LEN - 1 || +            fdctrl->data_pos == fdctrl->data_len) { +            cur_drv = get_cur_drv(fdctrl); +            if (blk_write(cur_drv->blk, fd_sector(cur_drv), fdctrl->fifo, 1) +                < 0) { +                FLOPPY_DPRINTF("error writing sector %d\n", +                               fd_sector(cur_drv)); +                break; +            } +            if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) { +                FLOPPY_DPRINTF("error seeking to next sector %d\n", +                               fd_sector(cur_drv)); +                break; +            } +        } + +        /* Switch to result phase when done with the transfer */ +        if (fdctrl->data_pos == fdctrl->data_len) { +            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); +        } +        break; + +    case FD_PHASE_COMMAND: +        assert(!(fdctrl->msr & FD_MSR_NONDMA)); +        assert(fdctrl->data_pos < FD_SECTOR_LEN); + +        if (pos == 0) { +            /* The first byte specifies the command. Now we start reading +             * as many parameters as this command requires. */ +            cmd = get_command(value); +            fdctrl->data_len = cmd->parameters + 1; +            if (cmd->parameters) { +                fdctrl->msr |= FD_MSR_RQM; +            } +            fdctrl->msr |= FD_MSR_CMDBUSY; +        } + +        if (fdctrl->data_pos == fdctrl->data_len) { +            /* We have all parameters now, execute the command */ +            fdctrl->phase = FD_PHASE_EXECUTION; + +            if (fdctrl->data_state & FD_STATE_FORMAT) { +                fdctrl_format_sector(fdctrl); +                break; +            } + +            cmd = get_command(fdctrl->fifo[0]); +            FLOPPY_DPRINTF("Calling handler for '%s'\n", cmd->name); +            cmd->handler(fdctrl, cmd->direction); +        } +        break; + +    case FD_PHASE_RESULT: +    default: +        abort(); +    } +} + +static void fdctrl_result_timer(void *opaque) +{ +    FDCtrl *fdctrl = opaque; +    FDrive *cur_drv = get_cur_drv(fdctrl); + +    /* Pretend we are spinning. +     * This is needed for Coherent, which uses READ ID to check for +     * sector interleaving. +     */ +    if (cur_drv->last_sect != 0) { +        cur_drv->sect = (cur_drv->sect % cur_drv->last_sect) + 1; +    } +    /* READ_ID can't automatically succeed! */ +    if (fdctrl->check_media_rate && +        (fdctrl->dsr & FD_DSR_DRATEMASK) != cur_drv->media_rate) { +        FLOPPY_DPRINTF("read id rate mismatch (fdc=%d, media=%d)\n", +                       fdctrl->dsr & FD_DSR_DRATEMASK, cur_drv->media_rate); +        fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); +    } else { +        fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00); +    } +} + +static void fdctrl_change_cb(void *opaque, bool load) +{ +    FDrive *drive = opaque; + +    drive->media_changed = 1; +    fd_revalidate(drive); +} + +static const BlockDevOps fdctrl_block_ops = { +    .change_media_cb = fdctrl_change_cb, +}; + +/* Init functions */ +static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp) +{ +    unsigned int i; +    FDrive *drive; + +    for (i = 0; i < MAX_FD; i++) { +        drive = &fdctrl->drives[i]; +        drive->fdctrl = fdctrl; + +        if (drive->blk) { +            if (blk_get_on_error(drive->blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { +                error_setg(errp, "fdc doesn't support drive option werror"); +                return; +            } +            if (blk_get_on_error(drive->blk, 1) != BLOCKDEV_ON_ERROR_REPORT) { +                error_setg(errp, "fdc doesn't support drive option rerror"); +                return; +            } +        } + +        fd_init(drive); +        fdctrl_change_cb(drive, 0); +        if (drive->blk) { +            blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive); +        } +    } +} + +ISADevice *fdctrl_init_isa(ISABus *bus, DriveInfo **fds) +{ +    DeviceState *dev; +    ISADevice *isadev; + +    isadev = isa_try_create(bus, TYPE_ISA_FDC); +    if (!isadev) { +        return NULL; +    } +    dev = DEVICE(isadev); + +    if (fds[0]) { +        qdev_prop_set_drive_nofail(dev, "driveA", blk_by_legacy_dinfo(fds[0])); +    } +    if (fds[1]) { +        qdev_prop_set_drive_nofail(dev, "driveB", blk_by_legacy_dinfo(fds[1])); +    } +    qdev_init_nofail(dev); + +    return isadev; +} + +void fdctrl_init_sysbus(qemu_irq irq, int dma_chann, +                        hwaddr mmio_base, DriveInfo **fds) +{ +    FDCtrl *fdctrl; +    DeviceState *dev; +    SysBusDevice *sbd; +    FDCtrlSysBus *sys; + +    dev = qdev_create(NULL, "sysbus-fdc"); +    sys = SYSBUS_FDC(dev); +    fdctrl = &sys->state; +    fdctrl->dma_chann = dma_chann; /* FIXME */ +    if (fds[0]) { +        qdev_prop_set_drive_nofail(dev, "driveA", blk_by_legacy_dinfo(fds[0])); +    } +    if (fds[1]) { +        qdev_prop_set_drive_nofail(dev, "driveB", blk_by_legacy_dinfo(fds[1])); +    } +    qdev_init_nofail(dev); +    sbd = SYS_BUS_DEVICE(dev); +    sysbus_connect_irq(sbd, 0, irq); +    sysbus_mmio_map(sbd, 0, mmio_base); +} + +void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base, +                       DriveInfo **fds, qemu_irq *fdc_tc) +{ +    DeviceState *dev; +    FDCtrlSysBus *sys; + +    dev = qdev_create(NULL, "SUNW,fdtwo"); +    if (fds[0]) { +        qdev_prop_set_drive_nofail(dev, "drive", blk_by_legacy_dinfo(fds[0])); +    } +    qdev_init_nofail(dev); +    sys = SYSBUS_FDC(dev); +    sysbus_connect_irq(SYS_BUS_DEVICE(sys), 0, irq); +    sysbus_mmio_map(SYS_BUS_DEVICE(sys), 0, io_base); +    *fdc_tc = qdev_get_gpio_in(dev, 0); +} + +static void fdctrl_realize_common(FDCtrl *fdctrl, Error **errp) +{ +    int i, j; +    static int command_tables_inited = 0; + +    /* Fill 'command_to_handler' lookup table */ +    if (!command_tables_inited) { +        command_tables_inited = 1; +        for (i = ARRAY_SIZE(handlers) - 1; i >= 0; i--) { +            for (j = 0; j < sizeof(command_to_handler); j++) { +                if ((j & handlers[i].mask) == handlers[i].value) { +                    command_to_handler[j] = i; +                } +            } +        } +    } + +    FLOPPY_DPRINTF("init controller\n"); +    fdctrl->fifo = qemu_memalign(512, FD_SECTOR_LEN); +    fdctrl->fifo_size = 512; +    fdctrl->result_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, +                                             fdctrl_result_timer, fdctrl); + +    fdctrl->version = 0x90; /* Intel 82078 controller */ +    fdctrl->config = FD_CONFIG_EIS | FD_CONFIG_EFIFO; /* Implicit seek, polling & FIFO enabled */ +    fdctrl->num_floppies = MAX_FD; + +    if (fdctrl->dma_chann != -1) { +        DMA_register_channel(fdctrl->dma_chann, &fdctrl_transfer_handler, fdctrl); +    } +    fdctrl_connect_drives(fdctrl, errp); +} + +static const MemoryRegionPortio fdc_portio_list[] = { +    { 1, 5, 1, .read = fdctrl_read, .write = fdctrl_write }, +    { 7, 1, 1, .read = fdctrl_read, .write = fdctrl_write }, +    PORTIO_END_OF_LIST(), +}; + +static void isabus_fdc_realize(DeviceState *dev, Error **errp) +{ +    ISADevice *isadev = ISA_DEVICE(dev); +    FDCtrlISABus *isa = ISA_FDC(dev); +    FDCtrl *fdctrl = &isa->state; +    Error *err = NULL; + +    isa_register_portio_list(isadev, isa->iobase, fdc_portio_list, fdctrl, +                             "fdc"); + +    isa_init_irq(isadev, &fdctrl->irq, isa->irq); +    fdctrl->dma_chann = isa->dma; + +    qdev_set_legacy_instance_id(dev, isa->iobase, 2); +    fdctrl_realize_common(fdctrl, &err); +    if (err != NULL) { +        error_propagate(errp, err); +        return; +    } +} + +static void sysbus_fdc_initfn(Object *obj) +{ +    SysBusDevice *sbd = SYS_BUS_DEVICE(obj); +    FDCtrlSysBus *sys = SYSBUS_FDC(obj); +    FDCtrl *fdctrl = &sys->state; + +    fdctrl->dma_chann = -1; + +    memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_ops, fdctrl, +                          "fdc", 0x08); +    sysbus_init_mmio(sbd, &fdctrl->iomem); +} + +static void sun4m_fdc_initfn(Object *obj) +{ +    SysBusDevice *sbd = SYS_BUS_DEVICE(obj); +    FDCtrlSysBus *sys = SYSBUS_FDC(obj); +    FDCtrl *fdctrl = &sys->state; + +    memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_strict_ops, +                          fdctrl, "fdctrl", 0x08); +    sysbus_init_mmio(sbd, &fdctrl->iomem); +} + +static void sysbus_fdc_common_initfn(Object *obj) +{ +    DeviceState *dev = DEVICE(obj); +    SysBusDevice *sbd = SYS_BUS_DEVICE(dev); +    FDCtrlSysBus *sys = SYSBUS_FDC(obj); +    FDCtrl *fdctrl = &sys->state; + +    qdev_set_legacy_instance_id(dev, 0 /* io */, 2); /* FIXME */ + +    sysbus_init_irq(sbd, &fdctrl->irq); +    qdev_init_gpio_in(dev, fdctrl_handle_tc, 1); +} + +static void sysbus_fdc_common_realize(DeviceState *dev, Error **errp) +{ +    FDCtrlSysBus *sys = SYSBUS_FDC(dev); +    FDCtrl *fdctrl = &sys->state; + +    fdctrl_realize_common(fdctrl, errp); +} + +FDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i) +{ +    FDCtrlISABus *isa = ISA_FDC(fdc); + +    return isa->state.drives[i].drive; +} + +static const VMStateDescription vmstate_isa_fdc ={ +    .name = "fdc", +    .version_id = 2, +    .minimum_version_id = 2, +    .fields = (VMStateField[]) { +        VMSTATE_STRUCT(state, FDCtrlISABus, 0, vmstate_fdc, FDCtrl), +        VMSTATE_END_OF_LIST() +    } +}; + +static Property isa_fdc_properties[] = { +    DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0), +    DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6), +    DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2), +    DEFINE_PROP_DRIVE("driveA", FDCtrlISABus, state.drives[0].blk), +    DEFINE_PROP_DRIVE("driveB", FDCtrlISABus, state.drives[1].blk), +    DEFINE_PROP_BIT("check_media_rate", FDCtrlISABus, state.check_media_rate, +                    0, true), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void isabus_fdc_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->realize = isabus_fdc_realize; +    dc->fw_name = "fdc"; +    dc->reset = fdctrl_external_reset_isa; +    dc->vmsd = &vmstate_isa_fdc; +    dc->props = isa_fdc_properties; +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static void isabus_fdc_instance_init(Object *obj) +{ +    FDCtrlISABus *isa = ISA_FDC(obj); + +    device_add_bootindex_property(obj, &isa->bootindexA, +                                  "bootindexA", "/floppy@0", +                                  DEVICE(obj), NULL); +    device_add_bootindex_property(obj, &isa->bootindexB, +                                  "bootindexB", "/floppy@1", +                                  DEVICE(obj), NULL); +} + +static const TypeInfo isa_fdc_info = { +    .name          = TYPE_ISA_FDC, +    .parent        = TYPE_ISA_DEVICE, +    .instance_size = sizeof(FDCtrlISABus), +    .class_init    = isabus_fdc_class_init, +    .instance_init = isabus_fdc_instance_init, +}; + +static const VMStateDescription vmstate_sysbus_fdc ={ +    .name = "fdc", +    .version_id = 2, +    .minimum_version_id = 2, +    .fields = (VMStateField[]) { +        VMSTATE_STRUCT(state, FDCtrlSysBus, 0, vmstate_fdc, FDCtrl), +        VMSTATE_END_OF_LIST() +    } +}; + +static Property sysbus_fdc_properties[] = { +    DEFINE_PROP_DRIVE("driveA", FDCtrlSysBus, state.drives[0].blk), +    DEFINE_PROP_DRIVE("driveB", FDCtrlSysBus, state.drives[1].blk), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void sysbus_fdc_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->props = sysbus_fdc_properties; +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo sysbus_fdc_info = { +    .name          = "sysbus-fdc", +    .parent        = TYPE_SYSBUS_FDC, +    .instance_init = sysbus_fdc_initfn, +    .class_init    = sysbus_fdc_class_init, +}; + +static Property sun4m_fdc_properties[] = { +    DEFINE_PROP_DRIVE("drive", FDCtrlSysBus, state.drives[0].blk), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void sun4m_fdc_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->props = sun4m_fdc_properties; +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo sun4m_fdc_info = { +    .name          = "SUNW,fdtwo", +    .parent        = TYPE_SYSBUS_FDC, +    .instance_init = sun4m_fdc_initfn, +    .class_init    = sun4m_fdc_class_init, +}; + +static void sysbus_fdc_common_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->realize = sysbus_fdc_common_realize; +    dc->reset = fdctrl_external_reset_sysbus; +    dc->vmsd = &vmstate_sysbus_fdc; +} + +static const TypeInfo sysbus_fdc_type_info = { +    .name          = TYPE_SYSBUS_FDC, +    .parent        = TYPE_SYS_BUS_DEVICE, +    .instance_size = sizeof(FDCtrlSysBus), +    .instance_init = sysbus_fdc_common_initfn, +    .abstract      = true, +    .class_init    = sysbus_fdc_common_class_init, +}; + +static void fdc_register_types(void) +{ +    type_register_static(&isa_fdc_info); +    type_register_static(&sysbus_fdc_type_info); +    type_register_static(&sysbus_fdc_info); +    type_register_static(&sun4m_fdc_info); +} + +type_init(fdc_register_types) diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c new file mode 100644 index 00000000..b187878f --- /dev/null +++ b/hw/block/hd-geometry.c @@ -0,0 +1,165 @@ +/* + * Hard disk geometry utilities + * + * Copyright (C) 2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + * This file incorporates work covered by the following copyright and + * permission notice: + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "sysemu/block-backend.h" +#include "hw/block/block.h" +#include "trace.h" + +struct partition { +        uint8_t boot_ind;           /* 0x80 - active */ +        uint8_t head;               /* starting head */ +        uint8_t sector;             /* starting sector */ +        uint8_t cyl;                /* starting cylinder */ +        uint8_t sys_ind;            /* What partition type */ +        uint8_t end_head;           /* end head */ +        uint8_t end_sector;         /* end sector */ +        uint8_t end_cyl;            /* end cylinder */ +        uint32_t start_sect;        /* starting sector counting from 0 */ +        uint32_t nr_sects;          /* nr of sectors in partition */ +} QEMU_PACKED; + +/* try to guess the disk logical geometry from the MSDOS partition table. +   Return 0 if OK, -1 if could not guess */ +static int guess_disk_lchs(BlockBackend *blk, +                           int *pcylinders, int *pheads, int *psectors) +{ +    uint8_t buf[BDRV_SECTOR_SIZE]; +    int i, heads, sectors, cylinders; +    struct partition *p; +    uint32_t nr_sects; +    uint64_t nb_sectors; + +    blk_get_geometry(blk, &nb_sectors); + +    /** +     * The function will be invoked during startup not only in sync I/O mode, +     * but also in async I/O mode. So the I/O throttling function has to +     * be disabled temporarily here, not permanently. +     */ +    if (blk_read_unthrottled(blk, 0, buf, 1) < 0) { +        return -1; +    } +    /* test msdos magic */ +    if (buf[510] != 0x55 || buf[511] != 0xaa) { +        return -1; +    } +    for (i = 0; i < 4; i++) { +        p = ((struct partition *)(buf + 0x1be)) + i; +        nr_sects = le32_to_cpu(p->nr_sects); +        if (nr_sects && p->end_head) { +            /* We make the assumption that the partition terminates on +               a cylinder boundary */ +            heads = p->end_head + 1; +            sectors = p->end_sector & 63; +            if (sectors == 0) { +                continue; +            } +            cylinders = nb_sectors / (heads * sectors); +            if (cylinders < 1 || cylinders > 16383) { +                continue; +            } +            *pheads = heads; +            *psectors = sectors; +            *pcylinders = cylinders; +            trace_hd_geometry_lchs_guess(blk, cylinders, heads, sectors); +            return 0; +        } +    } +    return -1; +} + +static void guess_chs_for_size(BlockBackend *blk, +                uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs) +{ +    uint64_t nb_sectors; +    int cylinders; + +    blk_get_geometry(blk, &nb_sectors); + +    cylinders = nb_sectors / (16 * 63); +    if (cylinders > 16383) { +        cylinders = 16383; +    } else if (cylinders < 2) { +        cylinders = 2; +    } +    *pcyls = cylinders; +    *pheads = 16; +    *psecs = 63; +} + +void hd_geometry_guess(BlockBackend *blk, +                       uint32_t *pcyls, uint32_t *pheads, uint32_t *psecs, +                       int *ptrans) +{ +    int cylinders, heads, secs, translation; +    HDGeometry geo; + +    /* Try to probe the backing device geometry, otherwise fallback +       to the old logic. (as of 12/2014 probing only succeeds on DASDs) */ +    if (blk_probe_geometry(blk, &geo) == 0) { +        *pcyls = geo.cylinders; +        *psecs = geo.sectors; +        *pheads = geo.heads; +        translation = BIOS_ATA_TRANSLATION_NONE; +    } else if (guess_disk_lchs(blk, &cylinders, &heads, &secs) < 0) { +        /* no LCHS guess: use a standard physical disk geometry  */ +        guess_chs_for_size(blk, pcyls, pheads, psecs); +        translation = hd_bios_chs_auto_trans(*pcyls, *pheads, *psecs); +    } else if (heads > 16) { +        /* LCHS guess with heads > 16 means that a BIOS LBA +           translation was active, so a standard physical disk +           geometry is OK */ +        guess_chs_for_size(blk, pcyls, pheads, psecs); +        translation = *pcyls * *pheads <= 131072 +            ? BIOS_ATA_TRANSLATION_LARGE +            : BIOS_ATA_TRANSLATION_LBA; +    } else { +        /* LCHS guess with heads <= 16: use as physical geometry */ +        *pcyls = cylinders; +        *pheads = heads; +        *psecs = secs; +        /* disable any translation to be in sync with +           the logical geometry */ +        translation = BIOS_ATA_TRANSLATION_NONE; +    } +    if (ptrans) { +        *ptrans = translation; +    } +    trace_hd_geometry_guess(blk, *pcyls, *pheads, *psecs, translation); +} + +int hd_bios_chs_auto_trans(uint32_t cyls, uint32_t heads, uint32_t secs) +{ +    return cyls <= 1024 && heads <= 16 && secs <= 63 +        ? BIOS_ATA_TRANSLATION_NONE +        : BIOS_ATA_TRANSLATION_LBA; +} diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c new file mode 100644 index 00000000..efc43dde --- /dev/null +++ b/hw/block/m25p80.c @@ -0,0 +1,711 @@ +/* + * ST M25P80 emulator. Emulate all SPI flash devices based on the m25p80 command + * set. Known devices table current as of Jun/2012 and taken from linux. + * See drivers/mtd/devices/m25p80.c. + * + * Copyright (C) 2011 Edgar E. Iglesias <edgar.iglesias@gmail.com> + * Copyright (C) 2012 Peter A. G. Crosthwaite <peter.crosthwaite@petalogix.com> + * Copyright (C) 2012 PetaLogix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) a later version of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/hw.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/ssi.h" + +#ifndef M25P80_ERR_DEBUG +#define M25P80_ERR_DEBUG 0 +#endif + +#define DB_PRINT_L(level, ...) do { \ +    if (M25P80_ERR_DEBUG > (level)) { \ +        fprintf(stderr,  ": %s: ", __func__); \ +        fprintf(stderr, ## __VA_ARGS__); \ +    } \ +} while (0); + +/* Fields for FlashPartInfo->flags */ + +/* erase capabilities */ +#define ER_4K 1 +#define ER_32K 2 +/* set to allow the page program command to write 0s back to 1. Useful for + * modelling EEPROM with SPI flash command set + */ +#define WR_1 0x100 + +typedef struct FlashPartInfo { +    const char *part_name; +    /* jedec code. (jedec >> 16) & 0xff is the 1st byte, >> 8 the 2nd etc */ +    uint32_t jedec; +    /* extended jedec code */ +    uint16_t ext_jedec; +    /* there is confusion between manufacturers as to what a sector is. In this +     * device model, a "sector" is the size that is erased by the ERASE_SECTOR +     * command (opcode 0xd8). +     */ +    uint32_t sector_size; +    uint32_t n_sectors; +    uint32_t page_size; +    uint8_t flags; +} FlashPartInfo; + +/* adapted from linux */ + +#define INFO(_part_name, _jedec, _ext_jedec, _sector_size, _n_sectors, _flags)\ +    .part_name = (_part_name),\ +    .jedec = (_jedec),\ +    .ext_jedec = (_ext_jedec),\ +    .sector_size = (_sector_size),\ +    .n_sectors = (_n_sectors),\ +    .page_size = 256,\ +    .flags = (_flags),\ + +#define JEDEC_NUMONYX 0x20 +#define JEDEC_WINBOND 0xEF +#define JEDEC_SPANSION 0x01 + +static const FlashPartInfo known_devices[] = { +    /* Atmel -- some are (confusingly) marketed as "DataFlash" */ +    { INFO("at25fs010",   0x1f6601,      0,  32 << 10,   4, ER_4K) }, +    { INFO("at25fs040",   0x1f6604,      0,  64 << 10,   8, ER_4K) }, + +    { INFO("at25df041a",  0x1f4401,      0,  64 << 10,   8, ER_4K) }, +    { INFO("at25df321a",  0x1f4701,      0,  64 << 10,  64, ER_4K) }, +    { INFO("at25df641",   0x1f4800,      0,  64 << 10, 128, ER_4K) }, + +    { INFO("at26f004",    0x1f0400,      0,  64 << 10,   8, ER_4K) }, +    { INFO("at26df081a",  0x1f4501,      0,  64 << 10,  16, ER_4K) }, +    { INFO("at26df161a",  0x1f4601,      0,  64 << 10,  32, ER_4K) }, +    { INFO("at26df321",   0x1f4700,      0,  64 << 10,  64, ER_4K) }, + +    { INFO("at45db081d",  0x1f2500,      0,  64 << 10,  16, ER_4K) }, + +    /* EON -- en25xxx */ +    { INFO("en25f32",     0x1c3116,      0,  64 << 10,  64, ER_4K) }, +    { INFO("en25p32",     0x1c2016,      0,  64 << 10,  64, 0) }, +    { INFO("en25q32b",    0x1c3016,      0,  64 << 10,  64, 0) }, +    { INFO("en25p64",     0x1c2017,      0,  64 << 10, 128, 0) }, +    { INFO("en25q64",     0x1c3017,      0,  64 << 10, 128, ER_4K) }, + +    /* GigaDevice */ +    { INFO("gd25q32",     0xc84016,      0,  64 << 10,  64, ER_4K) }, +    { INFO("gd25q64",     0xc84017,      0,  64 << 10, 128, ER_4K) }, + +    /* Intel/Numonyx -- xxxs33b */ +    { INFO("160s33b",     0x898911,      0,  64 << 10,  32, 0) }, +    { INFO("320s33b",     0x898912,      0,  64 << 10,  64, 0) }, +    { INFO("640s33b",     0x898913,      0,  64 << 10, 128, 0) }, +    { INFO("n25q064",     0x20ba17,      0,  64 << 10, 128, 0) }, + +    /* Macronix */ +    { INFO("mx25l2005a",  0xc22012,      0,  64 << 10,   4, ER_4K) }, +    { INFO("mx25l4005a",  0xc22013,      0,  64 << 10,   8, ER_4K) }, +    { INFO("mx25l8005",   0xc22014,      0,  64 << 10,  16, 0) }, +    { INFO("mx25l1606e",  0xc22015,      0,  64 << 10,  32, ER_4K) }, +    { INFO("mx25l3205d",  0xc22016,      0,  64 << 10,  64, 0) }, +    { INFO("mx25l6405d",  0xc22017,      0,  64 << 10, 128, 0) }, +    { INFO("mx25l12805d", 0xc22018,      0,  64 << 10, 256, 0) }, +    { INFO("mx25l12855e", 0xc22618,      0,  64 << 10, 256, 0) }, +    { INFO("mx25l25635e", 0xc22019,      0,  64 << 10, 512, 0) }, +    { INFO("mx25l25655e", 0xc22619,      0,  64 << 10, 512, 0) }, + +    /* Micron */ +    { INFO("n25q032a11",  0x20bb16,      0,  64 << 10,  64, ER_4K) }, +    { INFO("n25q032a13",  0x20ba16,      0,  64 << 10,  64, ER_4K) }, +    { INFO("n25q064a11",  0x20bb17,      0,  64 << 10, 128, ER_4K) }, +    { INFO("n25q064a13",  0x20ba17,      0,  64 << 10, 128, ER_4K) }, +    { INFO("n25q128a11",  0x20bb18,      0,  64 << 10, 256, ER_4K) }, +    { INFO("n25q128a13",  0x20ba18,      0,  64 << 10, 256, ER_4K) }, +    { INFO("n25q256a11",  0x20bb19,      0,  64 << 10, 512, ER_4K) }, +    { INFO("n25q256a13",  0x20ba19,      0,  64 << 10, 512, ER_4K) }, + +    /* Spansion -- single (large) sector size only, at least +     * for the chips listed here (without boot sectors). +     */ +    { INFO("s25sl032p",   0x010215, 0x4d00,  64 << 10,  64, ER_4K) }, +    { INFO("s25sl064p",   0x010216, 0x4d00,  64 << 10, 128, ER_4K) }, +    { INFO("s25fl256s0",  0x010219, 0x4d00, 256 << 10, 128, 0) }, +    { INFO("s25fl256s1",  0x010219, 0x4d01,  64 << 10, 512, 0) }, +    { INFO("s25fl512s",   0x010220, 0x4d00, 256 << 10, 256, 0) }, +    { INFO("s70fl01gs",   0x010221, 0x4d00, 256 << 10, 256, 0) }, +    { INFO("s25sl12800",  0x012018, 0x0300, 256 << 10,  64, 0) }, +    { INFO("s25sl12801",  0x012018, 0x0301,  64 << 10, 256, 0) }, +    { INFO("s25fl129p0",  0x012018, 0x4d00, 256 << 10,  64, 0) }, +    { INFO("s25fl129p1",  0x012018, 0x4d01,  64 << 10, 256, 0) }, +    { INFO("s25sl004a",   0x010212,      0,  64 << 10,   8, 0) }, +    { INFO("s25sl008a",   0x010213,      0,  64 << 10,  16, 0) }, +    { INFO("s25sl016a",   0x010214,      0,  64 << 10,  32, 0) }, +    { INFO("s25sl032a",   0x010215,      0,  64 << 10,  64, 0) }, +    { INFO("s25sl064a",   0x010216,      0,  64 << 10, 128, 0) }, +    { INFO("s25fl016k",   0xef4015,      0,  64 << 10,  32, ER_4K | ER_32K) }, +    { INFO("s25fl064k",   0xef4017,      0,  64 << 10, 128, ER_4K | ER_32K) }, + +    /* SST -- large erase sizes are "overlays", "sectors" are 4<< 10 */ +    { INFO("sst25vf040b", 0xbf258d,      0,  64 << 10,   8, ER_4K) }, +    { INFO("sst25vf080b", 0xbf258e,      0,  64 << 10,  16, ER_4K) }, +    { INFO("sst25vf016b", 0xbf2541,      0,  64 << 10,  32, ER_4K) }, +    { INFO("sst25vf032b", 0xbf254a,      0,  64 << 10,  64, ER_4K) }, +    { INFO("sst25wf512",  0xbf2501,      0,  64 << 10,   1, ER_4K) }, +    { INFO("sst25wf010",  0xbf2502,      0,  64 << 10,   2, ER_4K) }, +    { INFO("sst25wf020",  0xbf2503,      0,  64 << 10,   4, ER_4K) }, +    { INFO("sst25wf040",  0xbf2504,      0,  64 << 10,   8, ER_4K) }, + +    /* ST Microelectronics -- newer production may have feature updates */ +    { INFO("m25p05",      0x202010,      0,  32 << 10,   2, 0) }, +    { INFO("m25p10",      0x202011,      0,  32 << 10,   4, 0) }, +    { INFO("m25p20",      0x202012,      0,  64 << 10,   4, 0) }, +    { INFO("m25p40",      0x202013,      0,  64 << 10,   8, 0) }, +    { INFO("m25p80",      0x202014,      0,  64 << 10,  16, 0) }, +    { INFO("m25p16",      0x202015,      0,  64 << 10,  32, 0) }, +    { INFO("m25p32",      0x202016,      0,  64 << 10,  64, 0) }, +    { INFO("m25p64",      0x202017,      0,  64 << 10, 128, 0) }, +    { INFO("m25p128",     0x202018,      0, 256 << 10,  64, 0) }, +    { INFO("n25q032",     0x20ba16,      0,  64 << 10,  64, 0) }, + +    { INFO("m45pe10",     0x204011,      0,  64 << 10,   2, 0) }, +    { INFO("m45pe80",     0x204014,      0,  64 << 10,  16, 0) }, +    { INFO("m45pe16",     0x204015,      0,  64 << 10,  32, 0) }, + +    { INFO("m25pe20",     0x208012,      0,  64 << 10,   4, 0) }, +    { INFO("m25pe80",     0x208014,      0,  64 << 10,  16, 0) }, +    { INFO("m25pe16",     0x208015,      0,  64 << 10,  32, ER_4K) }, + +    { INFO("m25px32",     0x207116,      0,  64 << 10,  64, ER_4K) }, +    { INFO("m25px32-s0",  0x207316,      0,  64 << 10,  64, ER_4K) }, +    { INFO("m25px32-s1",  0x206316,      0,  64 << 10,  64, ER_4K) }, +    { INFO("m25px64",     0x207117,      0,  64 << 10, 128, 0) }, + +    /* Winbond -- w25x "blocks" are 64k, "sectors" are 4KiB */ +    { INFO("w25x10",      0xef3011,      0,  64 << 10,   2, ER_4K) }, +    { INFO("w25x20",      0xef3012,      0,  64 << 10,   4, ER_4K) }, +    { INFO("w25x40",      0xef3013,      0,  64 << 10,   8, ER_4K) }, +    { INFO("w25x80",      0xef3014,      0,  64 << 10,  16, ER_4K) }, +    { INFO("w25x16",      0xef3015,      0,  64 << 10,  32, ER_4K) }, +    { INFO("w25x32",      0xef3016,      0,  64 << 10,  64, ER_4K) }, +    { INFO("w25q32",      0xef4016,      0,  64 << 10,  64, ER_4K) }, +    { INFO("w25q32dw",    0xef6016,      0,  64 << 10,  64, ER_4K) }, +    { INFO("w25x64",      0xef3017,      0,  64 << 10, 128, ER_4K) }, +    { INFO("w25q64",      0xef4017,      0,  64 << 10, 128, ER_4K) }, +    { INFO("w25q80",      0xef5014,      0,  64 << 10,  16, ER_4K) }, +    { INFO("w25q80bl",    0xef4014,      0,  64 << 10,  16, ER_4K) }, +    { INFO("w25q256",     0xef4019,      0,  64 << 10, 512, ER_4K) }, + +    /* Numonyx -- n25q128 */ +    { INFO("n25q128",      0x20ba18,      0,  64 << 10, 256, 0) }, +}; + +typedef enum { +    NOP = 0, +    WRSR = 0x1, +    WRDI = 0x4, +    RDSR = 0x5, +    WREN = 0x6, +    JEDEC_READ = 0x9f, +    BULK_ERASE = 0xc7, + +    READ = 0x3, +    FAST_READ = 0xb, +    DOR = 0x3b, +    QOR = 0x6b, +    DIOR = 0xbb, +    QIOR = 0xeb, + +    PP = 0x2, +    DPP = 0xa2, +    QPP = 0x32, + +    ERASE_4K = 0x20, +    ERASE_32K = 0x52, +    ERASE_SECTOR = 0xd8, +} FlashCMD; + +typedef enum { +    STATE_IDLE, +    STATE_PAGE_PROGRAM, +    STATE_READ, +    STATE_COLLECTING_DATA, +    STATE_READING_DATA, +} CMDState; + +typedef struct Flash { +    SSISlave parent_obj; + +    uint32_t r; + +    BlockBackend *blk; + +    uint8_t *storage; +    uint32_t size; +    int page_size; + +    uint8_t state; +    uint8_t data[16]; +    uint32_t len; +    uint32_t pos; +    uint8_t needed_bytes; +    uint8_t cmd_in_progress; +    uint64_t cur_addr; +    bool write_enable; + +    int64_t dirty_page; + +    const FlashPartInfo *pi; + +} Flash; + +typedef struct M25P80Class { +    SSISlaveClass parent_class; +    FlashPartInfo *pi; +} M25P80Class; + +#define TYPE_M25P80 "m25p80-generic" +#define M25P80(obj) \ +     OBJECT_CHECK(Flash, (obj), TYPE_M25P80) +#define M25P80_CLASS(klass) \ +     OBJECT_CLASS_CHECK(M25P80Class, (klass), TYPE_M25P80) +#define M25P80_GET_CLASS(obj) \ +     OBJECT_GET_CLASS(M25P80Class, (obj), TYPE_M25P80) + +static void blk_sync_complete(void *opaque, int ret) +{ +    /* do nothing. Masters do not directly interact with the backing store, +     * only the working copy so no mutexing required. +     */ +} + +static void flash_sync_page(Flash *s, int page) +{ +    int blk_sector, nb_sectors; +    QEMUIOVector iov; + +    if (!s->blk || blk_is_read_only(s->blk)) { +        return; +    } + +    blk_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE; +    nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE); +    qemu_iovec_init(&iov, 1); +    qemu_iovec_add(&iov, s->storage + blk_sector * BDRV_SECTOR_SIZE, +                   nb_sectors * BDRV_SECTOR_SIZE); +    blk_aio_writev(s->blk, blk_sector, &iov, nb_sectors, blk_sync_complete, +                   NULL); +} + +static inline void flash_sync_area(Flash *s, int64_t off, int64_t len) +{ +    int64_t start, end, nb_sectors; +    QEMUIOVector iov; + +    if (!s->blk || blk_is_read_only(s->blk)) { +        return; +    } + +    assert(!(len % BDRV_SECTOR_SIZE)); +    start = off / BDRV_SECTOR_SIZE; +    end = (off + len) / BDRV_SECTOR_SIZE; +    nb_sectors = end - start; +    qemu_iovec_init(&iov, 1); +    qemu_iovec_add(&iov, s->storage + (start * BDRV_SECTOR_SIZE), +                                        nb_sectors * BDRV_SECTOR_SIZE); +    blk_aio_writev(s->blk, start, &iov, nb_sectors, blk_sync_complete, NULL); +} + +static void flash_erase(Flash *s, int offset, FlashCMD cmd) +{ +    uint32_t len; +    uint8_t capa_to_assert = 0; + +    switch (cmd) { +    case ERASE_4K: +        len = 4 << 10; +        capa_to_assert = ER_4K; +        break; +    case ERASE_32K: +        len = 32 << 10; +        capa_to_assert = ER_32K; +        break; +    case ERASE_SECTOR: +        len = s->pi->sector_size; +        break; +    case BULK_ERASE: +        len = s->size; +        break; +    default: +        abort(); +    } + +    DB_PRINT_L(0, "offset = %#x, len = %d\n", offset, len); +    if ((s->pi->flags & capa_to_assert) != capa_to_assert) { +        qemu_log_mask(LOG_GUEST_ERROR, "M25P80: %d erase size not supported by" +                      " device\n", len); +    } + +    if (!s->write_enable) { +        qemu_log_mask(LOG_GUEST_ERROR, "M25P80: erase with write protect!\n"); +        return; +    } +    memset(s->storage + offset, 0xff, len); +    flash_sync_area(s, offset, len); +} + +static inline void flash_sync_dirty(Flash *s, int64_t newpage) +{ +    if (s->dirty_page >= 0 && s->dirty_page != newpage) { +        flash_sync_page(s, s->dirty_page); +        s->dirty_page = newpage; +    } +} + +static inline +void flash_write8(Flash *s, uint64_t addr, uint8_t data) +{ +    int64_t page = addr / s->pi->page_size; +    uint8_t prev = s->storage[s->cur_addr]; + +    if (!s->write_enable) { +        qemu_log_mask(LOG_GUEST_ERROR, "M25P80: write with write protect!\n"); +    } + +    if ((prev ^ data) & data) { +        DB_PRINT_L(1, "programming zero to one! addr=%" PRIx64 "  %" PRIx8 +                   " -> %" PRIx8 "\n", addr, prev, data); +    } + +    if (s->pi->flags & WR_1) { +        s->storage[s->cur_addr] = data; +    } else { +        s->storage[s->cur_addr] &= data; +    } + +    flash_sync_dirty(s, page); +    s->dirty_page = page; +} + +static void complete_collecting_data(Flash *s) +{ +    s->cur_addr = s->data[0] << 16; +    s->cur_addr |= s->data[1] << 8; +    s->cur_addr |= s->data[2]; + +    s->state = STATE_IDLE; + +    switch (s->cmd_in_progress) { +    case DPP: +    case QPP: +    case PP: +        s->state = STATE_PAGE_PROGRAM; +        break; +    case READ: +    case FAST_READ: +    case DOR: +    case QOR: +    case DIOR: +    case QIOR: +        s->state = STATE_READ; +        break; +    case ERASE_4K: +    case ERASE_32K: +    case ERASE_SECTOR: +        flash_erase(s, s->cur_addr, s->cmd_in_progress); +        break; +    case WRSR: +        if (s->write_enable) { +            s->write_enable = false; +        } +        break; +    default: +        break; +    } +} + +static void decode_new_cmd(Flash *s, uint32_t value) +{ +    s->cmd_in_progress = value; +    DB_PRINT_L(0, "decoded new command:%x\n", value); + +    switch (value) { + +    case ERASE_4K: +    case ERASE_32K: +    case ERASE_SECTOR: +    case READ: +    case DPP: +    case QPP: +    case PP: +        s->needed_bytes = 3; +        s->pos = 0; +        s->len = 0; +        s->state = STATE_COLLECTING_DATA; +        break; + +    case FAST_READ: +    case DOR: +    case QOR: +        s->needed_bytes = 4; +        s->pos = 0; +        s->len = 0; +        s->state = STATE_COLLECTING_DATA; +        break; + +    case DIOR: +        switch ((s->pi->jedec >> 16) & 0xFF) { +        case JEDEC_WINBOND: +        case JEDEC_SPANSION: +            s->needed_bytes = 4; +            break; +        case JEDEC_NUMONYX: +        default: +            s->needed_bytes = 5; +        } +        s->pos = 0; +        s->len = 0; +        s->state = STATE_COLLECTING_DATA; +        break; + +    case QIOR: +        switch ((s->pi->jedec >> 16) & 0xFF) { +        case JEDEC_WINBOND: +        case JEDEC_SPANSION: +            s->needed_bytes = 6; +            break; +        case JEDEC_NUMONYX: +        default: +            s->needed_bytes = 8; +        } +        s->pos = 0; +        s->len = 0; +        s->state = STATE_COLLECTING_DATA; +        break; + +    case WRSR: +        if (s->write_enable) { +            s->needed_bytes = 1; +            s->pos = 0; +            s->len = 0; +            s->state = STATE_COLLECTING_DATA; +        } +        break; + +    case WRDI: +        s->write_enable = false; +        break; +    case WREN: +        s->write_enable = true; +        break; + +    case RDSR: +        s->data[0] = (!!s->write_enable) << 1; +        s->pos = 0; +        s->len = 1; +        s->state = STATE_READING_DATA; +        break; + +    case JEDEC_READ: +        DB_PRINT_L(0, "populated jedec code\n"); +        s->data[0] = (s->pi->jedec >> 16) & 0xff; +        s->data[1] = (s->pi->jedec >> 8) & 0xff; +        s->data[2] = s->pi->jedec & 0xff; +        if (s->pi->ext_jedec) { +            s->data[3] = (s->pi->ext_jedec >> 8) & 0xff; +            s->data[4] = s->pi->ext_jedec & 0xff; +            s->len = 5; +        } else { +            s->len = 3; +        } +        s->pos = 0; +        s->state = STATE_READING_DATA; +        break; + +    case BULK_ERASE: +        if (s->write_enable) { +            DB_PRINT_L(0, "chip erase\n"); +            flash_erase(s, 0, BULK_ERASE); +        } else { +            qemu_log_mask(LOG_GUEST_ERROR, "M25P80: chip erase with write " +                          "protect!\n"); +        } +        break; +    case NOP: +        break; +    default: +        qemu_log_mask(LOG_GUEST_ERROR, "M25P80: Unknown cmd %x\n", value); +        break; +    } +} + +static int m25p80_cs(SSISlave *ss, bool select) +{ +    Flash *s = M25P80(ss); + +    if (select) { +        s->len = 0; +        s->pos = 0; +        s->state = STATE_IDLE; +        flash_sync_dirty(s, -1); +    } + +    DB_PRINT_L(0, "%sselect\n", select ? "de" : ""); + +    return 0; +} + +static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx) +{ +    Flash *s = M25P80(ss); +    uint32_t r = 0; + +    switch (s->state) { + +    case STATE_PAGE_PROGRAM: +        DB_PRINT_L(1, "page program cur_addr=%#" PRIx64 " data=%" PRIx8 "\n", +                   s->cur_addr, (uint8_t)tx); +        flash_write8(s, s->cur_addr, (uint8_t)tx); +        s->cur_addr++; +        break; + +    case STATE_READ: +        r = s->storage[s->cur_addr]; +        DB_PRINT_L(1, "READ 0x%" PRIx64 "=%" PRIx8 "\n", s->cur_addr, +                   (uint8_t)r); +        s->cur_addr = (s->cur_addr + 1) % s->size; +        break; + +    case STATE_COLLECTING_DATA: +        s->data[s->len] = (uint8_t)tx; +        s->len++; + +        if (s->len == s->needed_bytes) { +            complete_collecting_data(s); +        } +        break; + +    case STATE_READING_DATA: +        r = s->data[s->pos]; +        s->pos++; +        if (s->pos == s->len) { +            s->pos = 0; +            s->state = STATE_IDLE; +        } +        break; + +    default: +    case STATE_IDLE: +        decode_new_cmd(s, (uint8_t)tx); +        break; +    } + +    return r; +} + +static int m25p80_init(SSISlave *ss) +{ +    DriveInfo *dinfo; +    Flash *s = M25P80(ss); +    M25P80Class *mc = M25P80_GET_CLASS(s); + +    s->pi = mc->pi; + +    s->size = s->pi->sector_size * s->pi->n_sectors; +    s->dirty_page = -1; + +    /* FIXME use a qdev drive property instead of drive_get_next() */ +    dinfo = drive_get_next(IF_MTD); + +    if (dinfo) { +        DB_PRINT_L(0, "Binding to IF_MTD drive\n"); +        s->blk = blk_by_legacy_dinfo(dinfo); +        blk_attach_dev_nofail(s->blk, s); + +        s->storage = blk_blockalign(s->blk, s->size); + +        /* FIXME: Move to late init */ +        if (blk_read(s->blk, 0, s->storage, +                     DIV_ROUND_UP(s->size, BDRV_SECTOR_SIZE))) { +            fprintf(stderr, "Failed to initialize SPI flash!\n"); +            return 1; +        } +    } else { +        DB_PRINT_L(0, "No BDRV - binding to RAM\n"); +        s->storage = blk_blockalign(NULL, s->size); +        memset(s->storage, 0xFF, s->size); +    } + +    return 0; +} + +static void m25p80_pre_save(void *opaque) +{ +    flash_sync_dirty((Flash *)opaque, -1); +} + +static const VMStateDescription vmstate_m25p80 = { +    .name = "xilinx_spi", +    .version_id = 1, +    .minimum_version_id = 1, +    .pre_save = m25p80_pre_save, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(state, Flash), +        VMSTATE_UINT8_ARRAY(data, Flash, 16), +        VMSTATE_UINT32(len, Flash), +        VMSTATE_UINT32(pos, Flash), +        VMSTATE_UINT8(needed_bytes, Flash), +        VMSTATE_UINT8(cmd_in_progress, Flash), +        VMSTATE_UINT64(cur_addr, Flash), +        VMSTATE_BOOL(write_enable, Flash), +        VMSTATE_END_OF_LIST() +    } +}; + +static void m25p80_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); +    SSISlaveClass *k = SSI_SLAVE_CLASS(klass); +    M25P80Class *mc = M25P80_CLASS(klass); + +    k->init = m25p80_init; +    k->transfer = m25p80_transfer8; +    k->set_cs = m25p80_cs; +    k->cs_polarity = SSI_CS_LOW; +    dc->vmsd = &vmstate_m25p80; +    mc->pi = data; +} + +static const TypeInfo m25p80_info = { +    .name           = TYPE_M25P80, +    .parent         = TYPE_SSI_SLAVE, +    .instance_size  = sizeof(Flash), +    .class_size     = sizeof(M25P80Class), +    .abstract       = true, +}; + +static void m25p80_register_types(void) +{ +    int i; + +    type_register_static(&m25p80_info); +    for (i = 0; i < ARRAY_SIZE(known_devices); ++i) { +        TypeInfo ti = { +            .name       = known_devices[i].part_name, +            .parent     = TYPE_M25P80, +            .class_init = m25p80_class_init, +            .class_data = (void *)&known_devices[i], +        }; +        type_register(&ti); +    } +} + +type_init(m25p80_register_types) diff --git a/hw/block/nand.c b/hw/block/nand.c new file mode 100644 index 00000000..61d2cec0 --- /dev/null +++ b/hw/block/nand.c @@ -0,0 +1,799 @@ +/* + * Flash NAND memory emulation.  Based on "16M x 8 Bit NAND Flash + * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from + * Samsung Electronic. + * + * Copyright (c) 2006 Openedhand Ltd. + * Written by Andrzej Zaborowski <balrog@zabor.org> + * + * Support for additional features based on "MT29F2G16ABCWP 2Gx16" + * datasheet from Micron Technology and "NAND02G-B2C" datasheet + * from ST Microelectronics. + * + * This code is licensed under the GNU GPL v2. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef NAND_IO + +# include "hw/hw.h" +# include "hw/block/flash.h" +#include "sysemu/block-backend.h" +#include "hw/qdev.h" +#include "qemu/error-report.h" + +# define NAND_CMD_READ0		0x00 +# define NAND_CMD_READ1		0x01 +# define NAND_CMD_READ2		0x50 +# define NAND_CMD_LPREAD2	0x30 +# define NAND_CMD_NOSERIALREAD2	0x35 +# define NAND_CMD_RANDOMREAD1	0x05 +# define NAND_CMD_RANDOMREAD2	0xe0 +# define NAND_CMD_READID	0x90 +# define NAND_CMD_RESET		0xff +# define NAND_CMD_PAGEPROGRAM1	0x80 +# define NAND_CMD_PAGEPROGRAM2	0x10 +# define NAND_CMD_CACHEPROGRAM2	0x15 +# define NAND_CMD_BLOCKERASE1	0x60 +# define NAND_CMD_BLOCKERASE2	0xd0 +# define NAND_CMD_READSTATUS	0x70 +# define NAND_CMD_COPYBACKPRG1	0x85 + +# define NAND_IOSTATUS_ERROR	(1 << 0) +# define NAND_IOSTATUS_PLANE0	(1 << 1) +# define NAND_IOSTATUS_PLANE1	(1 << 2) +# define NAND_IOSTATUS_PLANE2	(1 << 3) +# define NAND_IOSTATUS_PLANE3	(1 << 4) +# define NAND_IOSTATUS_READY    (1 << 6) +# define NAND_IOSTATUS_UNPROTCT	(1 << 7) + +# define MAX_PAGE		0x800 +# define MAX_OOB		0x40 + +typedef struct NANDFlashState NANDFlashState; +struct NANDFlashState { +    DeviceState parent_obj; + +    uint8_t manf_id, chip_id; +    uint8_t buswidth; /* in BYTES */ +    int size, pages; +    int page_shift, oob_shift, erase_shift, addr_shift; +    uint8_t *storage; +    BlockBackend *blk; +    int mem_oob; + +    uint8_t cle, ale, ce, wp, gnd; + +    uint8_t io[MAX_PAGE + MAX_OOB + 0x400]; +    uint8_t *ioaddr; +    int iolen; + +    uint32_t cmd; +    uint64_t addr; +    int addrlen; +    int status; +    int offset; + +    void (*blk_write)(NANDFlashState *s); +    void (*blk_erase)(NANDFlashState *s); +    void (*blk_load)(NANDFlashState *s, uint64_t addr, int offset); + +    uint32_t ioaddr_vmstate; +}; + +#define TYPE_NAND "nand" + +#define NAND(obj) \ +    OBJECT_CHECK(NANDFlashState, (obj), TYPE_NAND) + +static void mem_and(uint8_t *dest, const uint8_t *src, size_t n) +{ +    /* Like memcpy() but we logical-AND the data into the destination */ +    int i; +    for (i = 0; i < n; i++) { +        dest[i] &= src[i]; +    } +} + +# define NAND_NO_AUTOINCR	0x00000001 +# define NAND_BUSWIDTH_16	0x00000002 +# define NAND_NO_PADDING	0x00000004 +# define NAND_CACHEPRG		0x00000008 +# define NAND_COPYBACK		0x00000010 +# define NAND_IS_AND		0x00000020 +# define NAND_4PAGE_ARRAY	0x00000040 +# define NAND_NO_READRDY	0x00000100 +# define NAND_SAMSUNG_LP	(NAND_NO_PADDING | NAND_COPYBACK) + +# define NAND_IO + +# define PAGE(addr)		((addr) >> ADDR_SHIFT) +# define PAGE_START(page)	(PAGE(page) * (PAGE_SIZE + OOB_SIZE)) +# define PAGE_MASK		((1 << ADDR_SHIFT) - 1) +# define OOB_SHIFT		(PAGE_SHIFT - 5) +# define OOB_SIZE		(1 << OOB_SHIFT) +# define SECTOR(addr)		((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT)) +# define SECTOR_OFFSET(addr)	((addr) & ((511 >> PAGE_SHIFT) << 8)) + +# define PAGE_SIZE		256 +# define PAGE_SHIFT		8 +# define PAGE_SECTORS		1 +# define ADDR_SHIFT		8 +# include "nand.c" +# define PAGE_SIZE		512 +# define PAGE_SHIFT		9 +# define PAGE_SECTORS		1 +# define ADDR_SHIFT		8 +# include "nand.c" +# define PAGE_SIZE		2048 +# define PAGE_SHIFT		11 +# define PAGE_SECTORS		4 +# define ADDR_SHIFT		16 +# include "nand.c" + +/* Information based on Linux drivers/mtd/nand/nand_ids.c */ +static const struct { +    int size; +    int width; +    int page_shift; +    int erase_shift; +    uint32_t options; +} nand_flash_ids[0x100] = { +    [0 ... 0xff] = { 0 }, + +    [0x6e] = { 1,	8,	8, 4, 0 }, +    [0x64] = { 2,	8,	8, 4, 0 }, +    [0x6b] = { 4,	8,	9, 4, 0 }, +    [0xe8] = { 1,	8,	8, 4, 0 }, +    [0xec] = { 1,	8,	8, 4, 0 }, +    [0xea] = { 2,	8,	8, 4, 0 }, +    [0xd5] = { 4,	8,	9, 4, 0 }, +    [0xe3] = { 4,	8,	9, 4, 0 }, +    [0xe5] = { 4,	8,	9, 4, 0 }, +    [0xd6] = { 8,	8,	9, 4, 0 }, + +    [0x39] = { 8,	8,	9, 4, 0 }, +    [0xe6] = { 8,	8,	9, 4, 0 }, +    [0x49] = { 8,	16,	9, 4, NAND_BUSWIDTH_16 }, +    [0x59] = { 8,	16,	9, 4, NAND_BUSWIDTH_16 }, + +    [0x33] = { 16,	8,	9, 5, 0 }, +    [0x73] = { 16,	8,	9, 5, 0 }, +    [0x43] = { 16,	16,	9, 5, NAND_BUSWIDTH_16 }, +    [0x53] = { 16,	16,	9, 5, NAND_BUSWIDTH_16 }, + +    [0x35] = { 32,	8,	9, 5, 0 }, +    [0x75] = { 32,	8,	9, 5, 0 }, +    [0x45] = { 32,	16,	9, 5, NAND_BUSWIDTH_16 }, +    [0x55] = { 32,	16,	9, 5, NAND_BUSWIDTH_16 }, + +    [0x36] = { 64,	8,	9, 5, 0 }, +    [0x76] = { 64,	8,	9, 5, 0 }, +    [0x46] = { 64,	16,	9, 5, NAND_BUSWIDTH_16 }, +    [0x56] = { 64,	16,	9, 5, NAND_BUSWIDTH_16 }, + +    [0x78] = { 128,	8,	9, 5, 0 }, +    [0x39] = { 128,	8,	9, 5, 0 }, +    [0x79] = { 128,	8,	9, 5, 0 }, +    [0x72] = { 128,	16,	9, 5, NAND_BUSWIDTH_16 }, +    [0x49] = { 128,	16,	9, 5, NAND_BUSWIDTH_16 }, +    [0x74] = { 128,	16,	9, 5, NAND_BUSWIDTH_16 }, +    [0x59] = { 128,	16,	9, 5, NAND_BUSWIDTH_16 }, + +    [0x71] = { 256,	8,	9, 5, 0 }, + +    /* +     * These are the new chips with large page size. The pagesize and the +     * erasesize is determined from the extended id bytes +     */ +# define LP_OPTIONS	(NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR) +# define LP_OPTIONS16	(LP_OPTIONS | NAND_BUSWIDTH_16) + +    /* 512 Megabit */ +    [0xa2] = { 64,	8,	0, 0, LP_OPTIONS }, +    [0xf2] = { 64,	8,	0, 0, LP_OPTIONS }, +    [0xb2] = { 64,	16,	0, 0, LP_OPTIONS16 }, +    [0xc2] = { 64,	16,	0, 0, LP_OPTIONS16 }, + +    /* 1 Gigabit */ +    [0xa1] = { 128,	8,	0, 0, LP_OPTIONS }, +    [0xf1] = { 128,	8,	0, 0, LP_OPTIONS }, +    [0xb1] = { 128,	16,	0, 0, LP_OPTIONS16 }, +    [0xc1] = { 128,	16,	0, 0, LP_OPTIONS16 }, + +    /* 2 Gigabit */ +    [0xaa] = { 256,	8,	0, 0, LP_OPTIONS }, +    [0xda] = { 256,	8,	0, 0, LP_OPTIONS }, +    [0xba] = { 256,	16,	0, 0, LP_OPTIONS16 }, +    [0xca] = { 256,	16,	0, 0, LP_OPTIONS16 }, + +    /* 4 Gigabit */ +    [0xac] = { 512,	8,	0, 0, LP_OPTIONS }, +    [0xdc] = { 512,	8,	0, 0, LP_OPTIONS }, +    [0xbc] = { 512,	16,	0, 0, LP_OPTIONS16 }, +    [0xcc] = { 512,	16,	0, 0, LP_OPTIONS16 }, + +    /* 8 Gigabit */ +    [0xa3] = { 1024,	8,	0, 0, LP_OPTIONS }, +    [0xd3] = { 1024,	8,	0, 0, LP_OPTIONS }, +    [0xb3] = { 1024,	16,	0, 0, LP_OPTIONS16 }, +    [0xc3] = { 1024,	16,	0, 0, LP_OPTIONS16 }, + +    /* 16 Gigabit */ +    [0xa5] = { 2048,	8,	0, 0, LP_OPTIONS }, +    [0xd5] = { 2048,	8,	0, 0, LP_OPTIONS }, +    [0xb5] = { 2048,	16,	0, 0, LP_OPTIONS16 }, +    [0xc5] = { 2048,	16,	0, 0, LP_OPTIONS16 }, +}; + +static void nand_reset(DeviceState *dev) +{ +    NANDFlashState *s = NAND(dev); +    s->cmd = NAND_CMD_READ0; +    s->addr = 0; +    s->addrlen = 0; +    s->iolen = 0; +    s->offset = 0; +    s->status &= NAND_IOSTATUS_UNPROTCT; +    s->status |= NAND_IOSTATUS_READY; +} + +static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value) +{ +    s->ioaddr[s->iolen++] = value; +    for (value = s->buswidth; --value;) { +        s->ioaddr[s->iolen++] = 0; +    } +} + +static void nand_command(NANDFlashState *s) +{ +    unsigned int offset; +    switch (s->cmd) { +    case NAND_CMD_READ0: +        s->iolen = 0; +        break; + +    case NAND_CMD_READID: +        s->ioaddr = s->io; +        s->iolen = 0; +        nand_pushio_byte(s, s->manf_id); +        nand_pushio_byte(s, s->chip_id); +        nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */ +        if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { +            /* Page Size, Block Size, Spare Size; bit 6 indicates +             * 8 vs 16 bit width NAND. +             */ +            nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15); +        } else { +            nand_pushio_byte(s, 0xc0); /* Multi-plane */ +        } +        break; + +    case NAND_CMD_RANDOMREAD2: +    case NAND_CMD_NOSERIALREAD2: +        if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP)) +            break; +        offset = s->addr & ((1 << s->addr_shift) - 1); +        s->blk_load(s, s->addr, offset); +        if (s->gnd) +            s->iolen = (1 << s->page_shift) - offset; +        else +            s->iolen = (1 << s->page_shift) + (1 << s->oob_shift) - offset; +        break; + +    case NAND_CMD_RESET: +        nand_reset(DEVICE(s)); +        break; + +    case NAND_CMD_PAGEPROGRAM1: +        s->ioaddr = s->io; +        s->iolen = 0; +        break; + +    case NAND_CMD_PAGEPROGRAM2: +        if (s->wp) { +            s->blk_write(s); +        } +        break; + +    case NAND_CMD_BLOCKERASE1: +        break; + +    case NAND_CMD_BLOCKERASE2: +        s->addr &= (1ull << s->addrlen * 8) - 1; +        s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ? +                                                                    16 : 8; + +        if (s->wp) { +            s->blk_erase(s); +        } +        break; + +    case NAND_CMD_READSTATUS: +        s->ioaddr = s->io; +        s->iolen = 0; +        nand_pushio_byte(s, s->status); +        break; + +    default: +        printf("%s: Unknown NAND command 0x%02x\n", __FUNCTION__, s->cmd); +    } +} + +static void nand_pre_save(void *opaque) +{ +    NANDFlashState *s = NAND(opaque); + +    s->ioaddr_vmstate = s->ioaddr - s->io; +} + +static int nand_post_load(void *opaque, int version_id) +{ +    NANDFlashState *s = NAND(opaque); + +    if (s->ioaddr_vmstate > sizeof(s->io)) { +        return -EINVAL; +    } +    s->ioaddr = s->io + s->ioaddr_vmstate; + +    return 0; +} + +static const VMStateDescription vmstate_nand = { +    .name = "nand", +    .version_id = 1, +    .minimum_version_id = 1, +    .pre_save = nand_pre_save, +    .post_load = nand_post_load, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(cle, NANDFlashState), +        VMSTATE_UINT8(ale, NANDFlashState), +        VMSTATE_UINT8(ce, NANDFlashState), +        VMSTATE_UINT8(wp, NANDFlashState), +        VMSTATE_UINT8(gnd, NANDFlashState), +        VMSTATE_BUFFER(io, NANDFlashState), +        VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState), +        VMSTATE_INT32(iolen, NANDFlashState), +        VMSTATE_UINT32(cmd, NANDFlashState), +        VMSTATE_UINT64(addr, NANDFlashState), +        VMSTATE_INT32(addrlen, NANDFlashState), +        VMSTATE_INT32(status, NANDFlashState), +        VMSTATE_INT32(offset, NANDFlashState), +        /* XXX: do we want to save s->storage too? */ +        VMSTATE_END_OF_LIST() +    } +}; + +static void nand_realize(DeviceState *dev, Error **errp) +{ +    int pagesize; +    NANDFlashState *s = NAND(dev); + +    s->buswidth = nand_flash_ids[s->chip_id].width >> 3; +    s->size = nand_flash_ids[s->chip_id].size << 20; +    if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { +        s->page_shift = 11; +        s->erase_shift = 6; +    } else { +        s->page_shift = nand_flash_ids[s->chip_id].page_shift; +        s->erase_shift = nand_flash_ids[s->chip_id].erase_shift; +    } + +    switch (1 << s->page_shift) { +    case 256: +        nand_init_256(s); +        break; +    case 512: +        nand_init_512(s); +        break; +    case 2048: +        nand_init_2048(s); +        break; +    default: +        error_setg(errp, "Unsupported NAND block size %#x", +                   1 << s->page_shift); +        return; +    } + +    pagesize = 1 << s->oob_shift; +    s->mem_oob = 1; +    if (s->blk) { +        if (blk_is_read_only(s->blk)) { +            error_setg(errp, "Can't use a read-only drive"); +            return; +        } +        if (blk_getlength(s->blk) >= +                (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { +            pagesize = 0; +            s->mem_oob = 0; +        } +    } else { +        pagesize += 1 << s->page_shift; +    } +    if (pagesize) { +        s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize), +                        0xff, s->pages * pagesize); +    } +    /* Give s->ioaddr a sane value in case we save state before it is used. */ +    s->ioaddr = s->io; +} + +static Property nand_properties[] = { +    DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0), +    DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0), +    DEFINE_PROP_DRIVE("drive", NANDFlashState, blk), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void nand_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->realize = nand_realize; +    dc->reset = nand_reset; +    dc->vmsd = &vmstate_nand; +    dc->props = nand_properties; +} + +static const TypeInfo nand_info = { +    .name          = TYPE_NAND, +    .parent        = TYPE_DEVICE, +    .instance_size = sizeof(NANDFlashState), +    .class_init    = nand_class_init, +}; + +static void nand_register_types(void) +{ +    type_register_static(&nand_info); +} + +/* + * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins.  Chip + * outputs are R/B and eight I/O pins. + * + * CE, WP and R/B are active low. + */ +void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, +                  uint8_t ce, uint8_t wp, uint8_t gnd) +{ +    NANDFlashState *s = NAND(dev); + +    s->cle = cle; +    s->ale = ale; +    s->ce = ce; +    s->wp = wp; +    s->gnd = gnd; +    if (wp) { +        s->status |= NAND_IOSTATUS_UNPROTCT; +    } else { +        s->status &= ~NAND_IOSTATUS_UNPROTCT; +    } +} + +void nand_getpins(DeviceState *dev, int *rb) +{ +    *rb = 1; +} + +void nand_setio(DeviceState *dev, uint32_t value) +{ +    int i; +    NANDFlashState *s = NAND(dev); + +    if (!s->ce && s->cle) { +        if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { +            if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2) +                return; +            if (value == NAND_CMD_RANDOMREAD1) { +                s->addr &= ~((1 << s->addr_shift) - 1); +                s->addrlen = 0; +                return; +            } +        } +        if (value == NAND_CMD_READ0) { +            s->offset = 0; +        } else if (value == NAND_CMD_READ1) { +            s->offset = 0x100; +            value = NAND_CMD_READ0; +        } else if (value == NAND_CMD_READ2) { +            s->offset = 1 << s->page_shift; +            value = NAND_CMD_READ0; +        } + +        s->cmd = value; + +        if (s->cmd == NAND_CMD_READSTATUS || +                s->cmd == NAND_CMD_PAGEPROGRAM2 || +                s->cmd == NAND_CMD_BLOCKERASE1 || +                s->cmd == NAND_CMD_BLOCKERASE2 || +                s->cmd == NAND_CMD_NOSERIALREAD2 || +                s->cmd == NAND_CMD_RANDOMREAD2 || +                s->cmd == NAND_CMD_RESET) { +            nand_command(s); +        } + +        if (s->cmd != NAND_CMD_RANDOMREAD2) { +            s->addrlen = 0; +        } +    } + +    if (s->ale) { +        unsigned int shift = s->addrlen * 8; +        unsigned int mask = ~(0xff << shift); +        unsigned int v = value << shift; + +        s->addr = (s->addr & mask) | v; +        s->addrlen ++; + +        switch (s->addrlen) { +        case 1: +            if (s->cmd == NAND_CMD_READID) { +                nand_command(s); +            } +            break; +        case 2: /* fix cache address as a byte address */ +            s->addr <<= (s->buswidth - 1); +            break; +        case 3: +            if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && +                    (s->cmd == NAND_CMD_READ0 || +                     s->cmd == NAND_CMD_PAGEPROGRAM1)) { +                nand_command(s); +            } +            break; +        case 4: +            if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && +                    nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */ +                    (s->cmd == NAND_CMD_READ0 || +                     s->cmd == NAND_CMD_PAGEPROGRAM1)) { +                nand_command(s); +            } +            break; +        case 5: +            if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && +                    nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */ +                    (s->cmd == NAND_CMD_READ0 || +                     s->cmd == NAND_CMD_PAGEPROGRAM1)) { +                nand_command(s); +            } +            break; +        default: +            break; +        } +    } + +    if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) { +        if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) { +            for (i = s->buswidth; i--; value >>= 8) { +                s->io[s->iolen ++] = (uint8_t) (value & 0xff); +            } +        } +    } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) { +        if ((s->addr & ((1 << s->addr_shift) - 1)) < +                (1 << s->page_shift) + (1 << s->oob_shift)) { +            for (i = s->buswidth; i--; s->addr++, value >>= 8) { +                s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] = +                    (uint8_t) (value & 0xff); +            } +        } +    } +} + +uint32_t nand_getio(DeviceState *dev) +{ +    int offset; +    uint32_t x = 0; +    NANDFlashState *s = NAND(dev); + +    /* Allow sequential reading */ +    if (!s->iolen && s->cmd == NAND_CMD_READ0) { +        offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset; +        s->offset = 0; + +        s->blk_load(s, s->addr, offset); +        if (s->gnd) +            s->iolen = (1 << s->page_shift) - offset; +        else +            s->iolen = (1 << s->page_shift) + (1 << s->oob_shift) - offset; +    } + +    if (s->ce || s->iolen <= 0) { +        return 0; +    } + +    for (offset = s->buswidth; offset--;) { +        x |= s->ioaddr[offset] << (offset << 3); +    } +    /* after receiving READ STATUS command all subsequent reads will +     * return the status register value until another command is issued +     */ +    if (s->cmd != NAND_CMD_READSTATUS) { +        s->addr   += s->buswidth; +        s->ioaddr += s->buswidth; +        s->iolen  -= s->buswidth; +    } +    return x; +} + +uint32_t nand_getbuswidth(DeviceState *dev) +{ +    NANDFlashState *s = (NANDFlashState *) dev; +    return s->buswidth << 3; +} + +DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id) +{ +    DeviceState *dev; + +    if (nand_flash_ids[chip_id].size == 0) { +        hw_error("%s: Unsupported NAND chip ID.\n", __FUNCTION__); +    } +    dev = DEVICE(object_new(TYPE_NAND)); +    qdev_prop_set_uint8(dev, "manufacturer_id", manf_id); +    qdev_prop_set_uint8(dev, "chip_id", chip_id); +    if (blk) { +        qdev_prop_set_drive_nofail(dev, "drive", blk); +    } + +    qdev_init_nofail(dev); +    return dev; +} + +type_init(nand_register_types) + +#else + +/* Program a single page */ +static void glue(nand_blk_write_, PAGE_SIZE)(NANDFlashState *s) +{ +    uint64_t off, page, sector, soff; +    uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200]; +    if (PAGE(s->addr) >= s->pages) +        return; + +    if (!s->blk) { +        mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) + +                        s->offset, s->io, s->iolen); +    } else if (s->mem_oob) { +        sector = SECTOR(s->addr); +        off = (s->addr & PAGE_MASK) + s->offset; +        soff = SECTOR_OFFSET(s->addr); +        if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS) < 0) { +            printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); +            return; +        } + +        mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, PAGE_SIZE - off)); +        if (off + s->iolen > PAGE_SIZE) { +            page = PAGE(s->addr); +            mem_and(s->storage + (page << OOB_SHIFT), s->io + PAGE_SIZE - off, +                            MIN(OOB_SIZE, off + s->iolen - PAGE_SIZE)); +        } + +        if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS) < 0) { +            printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); +        } +    } else { +        off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset; +        sector = off >> 9; +        soff = off & 0x1ff; +        if (blk_read(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) { +            printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); +            return; +        } + +        mem_and(iobuf + soff, s->io, s->iolen); + +        if (blk_write(s->blk, sector, iobuf, PAGE_SECTORS + 2) < 0) { +            printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); +        } +    } +    s->offset = 0; +} + +/* Erase a single block */ +static void glue(nand_blk_erase_, PAGE_SIZE)(NANDFlashState *s) +{ +    uint64_t i, page, addr; +    uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, }; +    addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1); + +    if (PAGE(addr) >= s->pages) { +        return; +    } + +    if (!s->blk) { +        memset(s->storage + PAGE_START(addr), +                        0xff, (PAGE_SIZE + OOB_SIZE) << s->erase_shift); +    } else if (s->mem_oob) { +        memset(s->storage + (PAGE(addr) << OOB_SHIFT), +                        0xff, OOB_SIZE << s->erase_shift); +        i = SECTOR(addr); +        page = SECTOR(addr + (ADDR_SHIFT + s->erase_shift)); +        for (; i < page; i ++) +            if (blk_write(s->blk, i, iobuf, 1) < 0) { +                printf("%s: write error in sector %" PRIu64 "\n", __func__, i); +            } +    } else { +        addr = PAGE_START(addr); +        page = addr >> 9; +        if (blk_read(s->blk, page, iobuf, 1) < 0) { +            printf("%s: read error in sector %" PRIu64 "\n", __func__, page); +        } +        memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1); +        if (blk_write(s->blk, page, iobuf, 1) < 0) { +            printf("%s: write error in sector %" PRIu64 "\n", __func__, page); +        } + +        memset(iobuf, 0xff, 0x200); +        i = (addr & ~0x1ff) + 0x200; +        for (addr += ((PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200; +                        i < addr; i += 0x200) { +            if (blk_write(s->blk, i >> 9, iobuf, 1) < 0) { +                printf("%s: write error in sector %" PRIu64 "\n", +                       __func__, i >> 9); +            } +        } + +        page = i >> 9; +        if (blk_read(s->blk, page, iobuf, 1) < 0) { +            printf("%s: read error in sector %" PRIu64 "\n", __func__, page); +        } +        memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1); +        if (blk_write(s->blk, page, iobuf, 1) < 0) { +            printf("%s: write error in sector %" PRIu64 "\n", __func__, page); +        } +    } +} + +static void glue(nand_blk_load_, PAGE_SIZE)(NANDFlashState *s, +                uint64_t addr, int offset) +{ +    if (PAGE(addr) >= s->pages) { +        return; +    } + +    if (s->blk) { +        if (s->mem_oob) { +            if (blk_read(s->blk, SECTOR(addr), s->io, PAGE_SECTORS) < 0) { +                printf("%s: read error in sector %" PRIu64 "\n", +                                __func__, SECTOR(addr)); +            } +            memcpy(s->io + SECTOR_OFFSET(s->addr) + PAGE_SIZE, +                            s->storage + (PAGE(s->addr) << OOB_SHIFT), +                            OOB_SIZE); +            s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset; +        } else { +            if (blk_read(s->blk, PAGE_START(addr) >> 9, +                         s->io, (PAGE_SECTORS + 2)) < 0) { +                printf("%s: read error in sector %" PRIu64 "\n", +                                __func__, PAGE_START(addr) >> 9); +            } +            s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset; +        } +    } else { +        memcpy(s->io, s->storage + PAGE_START(s->addr) + +                        offset, PAGE_SIZE + OOB_SIZE - offset); +        s->ioaddr = s->io; +    } +} + +static void glue(nand_init_, PAGE_SIZE)(NANDFlashState *s) +{ +    s->oob_shift = PAGE_SHIFT - 5; +    s->pages = s->size >> PAGE_SHIFT; +    s->addr_shift = ADDR_SHIFT; + +    s->blk_erase = glue(nand_blk_erase_, PAGE_SIZE); +    s->blk_write = glue(nand_blk_write_, PAGE_SIZE); +    s->blk_load = glue(nand_blk_load_, PAGE_SIZE); +} + +# undef PAGE_SIZE +# undef PAGE_SHIFT +# undef PAGE_SECTORS +# undef ADDR_SHIFT +#endif	/* NAND_IO */ diff --git a/hw/block/nvme.c b/hw/block/nvme.c new file mode 100644 index 00000000..40d48803 --- /dev/null +++ b/hw/block/nvme.c @@ -0,0 +1,967 @@ +/* + * QEMU NVM Express Controller + * + * Copyright (c) 2012, Intel Corporation + * + * Written by Keith Busch <keith.busch@intel.com> + * + * This code is licensed under the GNU GPL v2 or later. + */ + +/** + * Reference Specs: http://www.nvmexpress.org, 1.1, 1.0e + * + *  http://www.nvmexpress.org/resources/ + */ + +/** + * Usage: add options: + *      -drive file=<file>,if=none,id=<drive_id> + *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]> + */ + +#include <hw/block/block.h> +#include <hw/hw.h> +#include <hw/pci/msix.h> +#include <hw/pci/pci.h> +#include "sysemu/sysemu.h" +#include "qapi/visitor.h" +#include "sysemu/block-backend.h" + +#include "nvme.h" + +static void nvme_process_sq(void *opaque); + +static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) +{ +    return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; +} + +static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) +{ +    return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1; +} + +static void nvme_inc_cq_tail(NvmeCQueue *cq) +{ +    cq->tail++; +    if (cq->tail >= cq->size) { +        cq->tail = 0; +        cq->phase = !cq->phase; +    } +} + +static void nvme_inc_sq_head(NvmeSQueue *sq) +{ +    sq->head = (sq->head + 1) % sq->size; +} + +static uint8_t nvme_cq_full(NvmeCQueue *cq) +{ +    return (cq->tail + 1) % cq->size == cq->head; +} + +static uint8_t nvme_sq_empty(NvmeSQueue *sq) +{ +    return sq->head == sq->tail; +} + +static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq) +{ +    if (cq->irq_enabled) { +        if (msix_enabled(&(n->parent_obj))) { +            msix_notify(&(n->parent_obj), cq->vector); +        } else { +            pci_irq_pulse(&n->parent_obj); +        } +    } +} + +static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2, +    uint32_t len, NvmeCtrl *n) +{ +    hwaddr trans_len = n->page_size - (prp1 % n->page_size); +    trans_len = MIN(len, trans_len); +    int num_prps = (len >> n->page_bits) + 1; + +    if (!prp1) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } + +    pci_dma_sglist_init(qsg, &n->parent_obj, num_prps); +    qemu_sglist_add(qsg, prp1, trans_len); +    len -= trans_len; +    if (len) { +        if (!prp2) { +            goto unmap; +        } +        if (len > n->page_size) { +            uint64_t prp_list[n->max_prp_ents]; +            uint32_t nents, prp_trans; +            int i = 0; + +            nents = (len + n->page_size - 1) >> n->page_bits; +            prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); +            pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans); +            while (len != 0) { +                uint64_t prp_ent = le64_to_cpu(prp_list[i]); + +                if (i == n->max_prp_ents - 1 && len > n->page_size) { +                    if (!prp_ent || prp_ent & (n->page_size - 1)) { +                        goto unmap; +                    } + +                    i = 0; +                    nents = (len + n->page_size - 1) >> n->page_bits; +                    prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t); +                    pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list, +                        prp_trans); +                    prp_ent = le64_to_cpu(prp_list[i]); +                } + +                if (!prp_ent || prp_ent & (n->page_size - 1)) { +                    goto unmap; +                } + +                trans_len = MIN(len, n->page_size); +                qemu_sglist_add(qsg, prp_ent, trans_len); +                len -= trans_len; +                i++; +            } +        } else { +            if (prp2 & (n->page_size - 1)) { +                goto unmap; +            } +            qemu_sglist_add(qsg, prp2, len); +        } +    } +    return NVME_SUCCESS; + + unmap: +    qemu_sglist_destroy(qsg); +    return NVME_INVALID_FIELD | NVME_DNR; +} + +static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, +    uint64_t prp1, uint64_t prp2) +{ +    QEMUSGList qsg; + +    if (nvme_map_prp(&qsg, prp1, prp2, len, n)) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    if (dma_buf_read(ptr, len, &qsg)) { +        qemu_sglist_destroy(&qsg); +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    qemu_sglist_destroy(&qsg); +    return NVME_SUCCESS; +} + +static void nvme_post_cqes(void *opaque) +{ +    NvmeCQueue *cq = opaque; +    NvmeCtrl *n = cq->ctrl; +    NvmeRequest *req, *next; + +    QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { +        NvmeSQueue *sq; +        hwaddr addr; + +        if (nvme_cq_full(cq)) { +            break; +        } + +        QTAILQ_REMOVE(&cq->req_list, req, entry); +        sq = req->sq; +        req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase); +        req->cqe.sq_id = cpu_to_le16(sq->sqid); +        req->cqe.sq_head = cpu_to_le16(sq->head); +        addr = cq->dma_addr + cq->tail * n->cqe_size; +        nvme_inc_cq_tail(cq); +        pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe, +            sizeof(req->cqe)); +        QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); +    } +    nvme_isr_notify(n, cq); +} + +static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) +{ +    assert(cq->cqid == req->sq->cqid); +    QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); +    QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); +    timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +} + +static void nvme_rw_cb(void *opaque, int ret) +{ +    NvmeRequest *req = opaque; +    NvmeSQueue *sq = req->sq; +    NvmeCtrl *n = sq->ctrl; +    NvmeCQueue *cq = n->cq[sq->cqid]; + +    block_acct_done(blk_get_stats(n->conf.blk), &req->acct); +    if (!ret) { +        req->status = NVME_SUCCESS; +    } else { +        req->status = NVME_INTERNAL_DEV_ERROR; +    } +    if (req->has_sg) { +        qemu_sglist_destroy(&req->qsg); +    } +    nvme_enqueue_req_completion(cq, req); +} + +static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, +    NvmeRequest *req) +{ +    req->has_sg = false; +    block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, +         BLOCK_ACCT_FLUSH); +    req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); + +    return NVME_NO_COMPLETE; +} + +static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, +    NvmeRequest *req) +{ +    NvmeRwCmd *rw = (NvmeRwCmd *)cmd; +    uint32_t nlb  = le32_to_cpu(rw->nlb) + 1; +    uint64_t slba = le64_to_cpu(rw->slba); +    uint64_t prp1 = le64_to_cpu(rw->prp1); +    uint64_t prp2 = le64_to_cpu(rw->prp2); + +    uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); +    uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds; +    uint64_t data_size = (uint64_t)nlb << data_shift; +    uint64_t aio_slba  = slba << (data_shift - BDRV_SECTOR_BITS); +    int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; + +    if ((slba + nlb) > ns->id_ns.nsze) { +        return NVME_LBA_RANGE | NVME_DNR; +    } +    if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    assert((nlb << data_shift) == req->qsg.size); + +    req->has_sg = true; +    dma_acct_start(n->conf.blk, &req->acct, &req->qsg, +                   is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); +    req->aiocb = is_write ? +        dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) : +        dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req); + +    return NVME_NO_COMPLETE; +} + +static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ +    NvmeNamespace *ns; +    uint32_t nsid = le32_to_cpu(cmd->nsid); + +    if (nsid == 0 || nsid > n->num_namespaces) { +        return NVME_INVALID_NSID | NVME_DNR; +    } + +    ns = &n->namespaces[nsid - 1]; +    switch (cmd->opcode) { +    case NVME_CMD_FLUSH: +        return nvme_flush(n, ns, cmd, req); +    case NVME_CMD_WRITE: +    case NVME_CMD_READ: +        return nvme_rw(n, ns, cmd, req); +    default: +        return NVME_INVALID_OPCODE | NVME_DNR; +    } +} + +static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) +{ +    n->sq[sq->sqid] = NULL; +    timer_del(sq->timer); +    timer_free(sq->timer); +    g_free(sq->io_req); +    if (sq->sqid) { +        g_free(sq); +    } +} + +static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) +{ +    NvmeDeleteQ *c = (NvmeDeleteQ *)cmd; +    NvmeRequest *req, *next; +    NvmeSQueue *sq; +    NvmeCQueue *cq; +    uint16_t qid = le16_to_cpu(c->qid); + +    if (!qid || nvme_check_sqid(n, qid)) { +        return NVME_INVALID_QID | NVME_DNR; +    } + +    sq = n->sq[qid]; +    while (!QTAILQ_EMPTY(&sq->out_req_list)) { +        req = QTAILQ_FIRST(&sq->out_req_list); +        assert(req->aiocb); +        blk_aio_cancel(req->aiocb); +    } +    if (!nvme_check_cqid(n, sq->cqid)) { +        cq = n->cq[sq->cqid]; +        QTAILQ_REMOVE(&cq->sq_list, sq, entry); + +        nvme_post_cqes(cq); +        QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) { +            if (req->sq == sq) { +                QTAILQ_REMOVE(&cq->req_list, req, entry); +                QTAILQ_INSERT_TAIL(&sq->req_list, req, entry); +            } +        } +    } + +    nvme_free_sq(sq, n); +    return NVME_SUCCESS; +} + +static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, +    uint16_t sqid, uint16_t cqid, uint16_t size) +{ +    int i; +    NvmeCQueue *cq; + +    sq->ctrl = n; +    sq->dma_addr = dma_addr; +    sq->sqid = sqid; +    sq->size = size; +    sq->cqid = cqid; +    sq->head = sq->tail = 0; +    sq->io_req = g_new(NvmeRequest, sq->size); + +    QTAILQ_INIT(&sq->req_list); +    QTAILQ_INIT(&sq->out_req_list); +    for (i = 0; i < sq->size; i++) { +        sq->io_req[i].sq = sq; +        QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); +    } +    sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); + +    assert(n->cq[cqid]); +    cq = n->cq[cqid]; +    QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry); +    n->sq[sqid] = sq; +} + +static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) +{ +    NvmeSQueue *sq; +    NvmeCreateSq *c = (NvmeCreateSq *)cmd; + +    uint16_t cqid = le16_to_cpu(c->cqid); +    uint16_t sqid = le16_to_cpu(c->sqid); +    uint16_t qsize = le16_to_cpu(c->qsize); +    uint16_t qflags = le16_to_cpu(c->sq_flags); +    uint64_t prp1 = le64_to_cpu(c->prp1); + +    if (!cqid || nvme_check_cqid(n, cqid)) { +        return NVME_INVALID_CQID | NVME_DNR; +    } +    if (!sqid || (sqid && !nvme_check_sqid(n, sqid))) { +        return NVME_INVALID_QID | NVME_DNR; +    } +    if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { +        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; +    } +    if (!prp1 || prp1 & (n->page_size - 1)) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    if (!(NVME_SQ_FLAGS_PC(qflags))) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    sq = g_malloc0(sizeof(*sq)); +    nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1); +    return NVME_SUCCESS; +} + +static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) +{ +    n->cq[cq->cqid] = NULL; +    timer_del(cq->timer); +    timer_free(cq->timer); +    msix_vector_unuse(&n->parent_obj, cq->vector); +    if (cq->cqid) { +        g_free(cq); +    } +} + +static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) +{ +    NvmeDeleteQ *c = (NvmeDeleteQ *)cmd; +    NvmeCQueue *cq; +    uint16_t qid = le16_to_cpu(c->qid); + +    if (!qid || nvme_check_cqid(n, qid)) { +        return NVME_INVALID_CQID | NVME_DNR; +    } + +    cq = n->cq[qid]; +    if (!QTAILQ_EMPTY(&cq->sq_list)) { +        return NVME_INVALID_QUEUE_DEL; +    } +    nvme_free_cq(cq, n); +    return NVME_SUCCESS; +} + +static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, +    uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) +{ +    cq->ctrl = n; +    cq->cqid = cqid; +    cq->size = size; +    cq->dma_addr = dma_addr; +    cq->phase = 1; +    cq->irq_enabled = irq_enabled; +    cq->vector = vector; +    cq->head = cq->tail = 0; +    QTAILQ_INIT(&cq->req_list); +    QTAILQ_INIT(&cq->sq_list); +    msix_vector_use(&n->parent_obj, cq->vector); +    n->cq[cqid] = cq; +    cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); +} + +static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) +{ +    NvmeCQueue *cq; +    NvmeCreateCq *c = (NvmeCreateCq *)cmd; +    uint16_t cqid = le16_to_cpu(c->cqid); +    uint16_t vector = le16_to_cpu(c->irq_vector); +    uint16_t qsize = le16_to_cpu(c->qsize); +    uint16_t qflags = le16_to_cpu(c->cq_flags); +    uint64_t prp1 = le64_to_cpu(c->prp1); + +    if (!cqid || (cqid && !nvme_check_cqid(n, cqid))) { +        return NVME_INVALID_CQID | NVME_DNR; +    } +    if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) { +        return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; +    } +    if (!prp1) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    if (vector > n->num_queues) { +        return NVME_INVALID_IRQ_VECTOR | NVME_DNR; +    } +    if (!(NVME_CQ_FLAGS_PC(qflags))) { +        return NVME_INVALID_FIELD | NVME_DNR; +    } + +    cq = g_malloc0(sizeof(*cq)); +    nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1, +        NVME_CQ_FLAGS_IEN(qflags)); +    return NVME_SUCCESS; +} + +static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) +{ +    NvmeNamespace *ns; +    NvmeIdentify *c = (NvmeIdentify *)cmd; +    uint32_t cns  = le32_to_cpu(c->cns); +    uint32_t nsid = le32_to_cpu(c->nsid); +    uint64_t prp1 = le64_to_cpu(c->prp1); +    uint64_t prp2 = le64_to_cpu(c->prp2); + +    if (cns) { +        return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), +            prp1, prp2); +    } +    if (nsid == 0 || nsid > n->num_namespaces) { +        return NVME_INVALID_NSID | NVME_DNR; +    } + +    ns = &n->namespaces[nsid - 1]; +    return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), +        prp1, prp2); +} + +static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ +    uint32_t dw10 = le32_to_cpu(cmd->cdw10); +    uint32_t result; + +    switch (dw10) { +    case NVME_VOLATILE_WRITE_CACHE: +        result = blk_enable_write_cache(n->conf.blk); +        break; +    case NVME_NUMBER_OF_QUEUES: +        result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); +        break; +    default: +        return NVME_INVALID_FIELD | NVME_DNR; +    } + +    req->cqe.result = result; +    return NVME_SUCCESS; +} + +static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ +    uint32_t dw10 = le32_to_cpu(cmd->cdw10); +    uint32_t dw11 = le32_to_cpu(cmd->cdw11); + +    switch (dw10) { +    case NVME_VOLATILE_WRITE_CACHE: +        blk_set_enable_write_cache(n->conf.blk, dw11 & 1); +        break; +    case NVME_NUMBER_OF_QUEUES: +        req->cqe.result = +            cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); +        break; +    default: +        return NVME_INVALID_FIELD | NVME_DNR; +    } +    return NVME_SUCCESS; +} + +static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ +    switch (cmd->opcode) { +    case NVME_ADM_CMD_DELETE_SQ: +        return nvme_del_sq(n, cmd); +    case NVME_ADM_CMD_CREATE_SQ: +        return nvme_create_sq(n, cmd); +    case NVME_ADM_CMD_DELETE_CQ: +        return nvme_del_cq(n, cmd); +    case NVME_ADM_CMD_CREATE_CQ: +        return nvme_create_cq(n, cmd); +    case NVME_ADM_CMD_IDENTIFY: +        return nvme_identify(n, cmd); +    case NVME_ADM_CMD_SET_FEATURES: +        return nvme_set_feature(n, cmd, req); +    case NVME_ADM_CMD_GET_FEATURES: +        return nvme_get_feature(n, cmd, req); +    default: +        return NVME_INVALID_OPCODE | NVME_DNR; +    } +} + +static void nvme_process_sq(void *opaque) +{ +    NvmeSQueue *sq = opaque; +    NvmeCtrl *n = sq->ctrl; +    NvmeCQueue *cq = n->cq[sq->cqid]; + +    uint16_t status; +    hwaddr addr; +    NvmeCmd cmd; +    NvmeRequest *req; + +    while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) { +        addr = sq->dma_addr + sq->head * n->sqe_size; +        pci_dma_read(&n->parent_obj, addr, (void *)&cmd, sizeof(cmd)); +        nvme_inc_sq_head(sq); + +        req = QTAILQ_FIRST(&sq->req_list); +        QTAILQ_REMOVE(&sq->req_list, req, entry); +        QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry); +        memset(&req->cqe, 0, sizeof(req->cqe)); +        req->cqe.cid = cmd.cid; + +        status = sq->sqid ? nvme_io_cmd(n, &cmd, req) : +            nvme_admin_cmd(n, &cmd, req); +        if (status != NVME_NO_COMPLETE) { +            req->status = status; +            nvme_enqueue_req_completion(cq, req); +        } +    } +} + +static void nvme_clear_ctrl(NvmeCtrl *n) +{ +    int i; + +    for (i = 0; i < n->num_queues; i++) { +        if (n->sq[i] != NULL) { +            nvme_free_sq(n->sq[i], n); +        } +    } +    for (i = 0; i < n->num_queues; i++) { +        if (n->cq[i] != NULL) { +            nvme_free_cq(n->cq[i], n); +        } +    } + +    blk_flush(n->conf.blk); +    n->bar.cc = 0; +} + +static int nvme_start_ctrl(NvmeCtrl *n) +{ +    uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12; +    uint32_t page_size = 1 << page_bits; + +    if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq || +            n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) || +            NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) || +            NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) || +            NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) || +            NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) || +            NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) || +            NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) || +            !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) { +        return -1; +    } + +    n->page_bits = page_bits; +    n->page_size = page_size; +    n->max_prp_ents = n->page_size / sizeof(uint64_t); +    n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc); +    n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc); +    nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0, +        NVME_AQA_ACQS(n->bar.aqa) + 1, 1); +    nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0, +        NVME_AQA_ASQS(n->bar.aqa) + 1); + +    return 0; +} + +static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, +    unsigned size) +{ +    switch (offset) { +    case 0xc: +        n->bar.intms |= data & 0xffffffff; +        n->bar.intmc = n->bar.intms; +        break; +    case 0x10: +        n->bar.intms &= ~(data & 0xffffffff); +        n->bar.intmc = n->bar.intms; +        break; +    case 0x14: +        /* Windows first sends data, then sends enable bit */ +        if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && +            !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) +        { +            n->bar.cc = data; +        } + +        if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { +            n->bar.cc = data; +            if (nvme_start_ctrl(n)) { +                n->bar.csts = NVME_CSTS_FAILED; +            } else { +                n->bar.csts = NVME_CSTS_READY; +            } +        } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { +            nvme_clear_ctrl(n); +            n->bar.csts &= ~NVME_CSTS_READY; +        } +        if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { +                nvme_clear_ctrl(n); +                n->bar.cc = data; +                n->bar.csts |= NVME_CSTS_SHST_COMPLETE; +        } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { +                n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; +                n->bar.cc = data; +        } +        break; +    case 0x24: +        n->bar.aqa = data & 0xffffffff; +        break; +    case 0x28: +        n->bar.asq = data; +        break; +    case 0x2c: +        n->bar.asq |= data << 32; +        break; +    case 0x30: +        n->bar.acq = data; +        break; +    case 0x34: +        n->bar.acq |= data << 32; +        break; +    default: +        break; +    } +} + +static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ +    NvmeCtrl *n = (NvmeCtrl *)opaque; +    uint8_t *ptr = (uint8_t *)&n->bar; +    uint64_t val = 0; + +    if (addr < sizeof(n->bar)) { +        memcpy(&val, ptr + addr, size); +    } +    return val; +} + +static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) +{ +    uint32_t qid; + +    if (addr & ((1 << 2) - 1)) { +        return; +    } + +    if (((addr - 0x1000) >> 2) & 1) { +        uint16_t new_head = val & 0xffff; +        int start_sqs; +        NvmeCQueue *cq; + +        qid = (addr - (0x1000 + (1 << 2))) >> 3; +        if (nvme_check_cqid(n, qid)) { +            return; +        } + +        cq = n->cq[qid]; +        if (new_head >= cq->size) { +            return; +        } + +        start_sqs = nvme_cq_full(cq) ? 1 : 0; +        cq->head = new_head; +        if (start_sqs) { +            NvmeSQueue *sq; +            QTAILQ_FOREACH(sq, &cq->sq_list, entry) { +                timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +            } +            timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +        } + +        if (cq->tail != cq->head) { +            nvme_isr_notify(n, cq); +        } +    } else { +        uint16_t new_tail = val & 0xffff; +        NvmeSQueue *sq; + +        qid = (addr - 0x1000) >> 3; +        if (nvme_check_sqid(n, qid)) { +            return; +        } + +        sq = n->sq[qid]; +        if (new_tail >= sq->size) { +            return; +        } + +        sq->tail = new_tail; +        timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +    } +} + +static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data, +    unsigned size) +{ +    NvmeCtrl *n = (NvmeCtrl *)opaque; +    if (addr < sizeof(n->bar)) { +        nvme_write_bar(n, addr, data, size); +    } else if (addr >= 0x1000) { +        nvme_process_db(n, addr, data); +    } +} + +static const MemoryRegionOps nvme_mmio_ops = { +    .read = nvme_mmio_read, +    .write = nvme_mmio_write, +    .endianness = DEVICE_LITTLE_ENDIAN, +    .impl = { +        .min_access_size = 2, +        .max_access_size = 8, +    }, +}; + +static int nvme_init(PCIDevice *pci_dev) +{ +    NvmeCtrl *n = NVME(pci_dev); +    NvmeIdCtrl *id = &n->id_ctrl; + +    int i; +    int64_t bs_size; +    uint8_t *pci_conf; + +    if (!n->conf.blk) { +        return -1; +    } + +    bs_size = blk_getlength(n->conf.blk); +    if (bs_size < 0) { +        return -1; +    } + +    blkconf_serial(&n->conf, &n->serial); +    if (!n->serial) { +        return -1; +    } +    blkconf_blocksizes(&n->conf); + +    pci_conf = pci_dev->config; +    pci_conf[PCI_INTERRUPT_PIN] = 1; +    pci_config_set_prog_interface(pci_dev->config, 0x2); +    pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); +    pcie_endpoint_cap_init(&n->parent_obj, 0x80); + +    n->num_namespaces = 1; +    n->num_queues = 64; +    n->reg_size = 1 << qemu_fls(0x1004 + 2 * (n->num_queues + 1) * 4); +    n->ns_size = bs_size / (uint64_t)n->num_namespaces; + +    n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); +    n->sq = g_new0(NvmeSQueue *, n->num_queues); +    n->cq = g_new0(NvmeCQueue *, n->num_queues); + +    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, +                          "nvme", n->reg_size); +    pci_register_bar(&n->parent_obj, 0, +        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, +        &n->iomem); +    msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4); + +    id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); +    id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); +    strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); +    strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); +    strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' '); +    id->rab = 6; +    id->ieee[0] = 0x00; +    id->ieee[1] = 0x02; +    id->ieee[2] = 0xb3; +    id->oacs = cpu_to_le16(0); +    id->frmw = 7 << 1; +    id->lpa = 1 << 0; +    id->sqes = (0x6 << 4) | 0x6; +    id->cqes = (0x4 << 4) | 0x4; +    id->nn = cpu_to_le32(n->num_namespaces); +    id->psd[0].mp = cpu_to_le16(0x9c4); +    id->psd[0].enlat = cpu_to_le32(0x10); +    id->psd[0].exlat = cpu_to_le32(0x4); +    if (blk_enable_write_cache(n->conf.blk)) { +        id->vwc = 1; +    } + +    n->bar.cap = 0; +    NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); +    NVME_CAP_SET_CQR(n->bar.cap, 1); +    NVME_CAP_SET_AMS(n->bar.cap, 1); +    NVME_CAP_SET_TO(n->bar.cap, 0xf); +    NVME_CAP_SET_CSS(n->bar.cap, 1); +    NVME_CAP_SET_MPSMAX(n->bar.cap, 4); + +    n->bar.vs = 0x00010100; +    n->bar.intmc = n->bar.intms = 0; + +    for (i = 0; i < n->num_namespaces; i++) { +        NvmeNamespace *ns = &n->namespaces[i]; +        NvmeIdNs *id_ns = &ns->id_ns; +        id_ns->nsfeat = 0; +        id_ns->nlbaf = 0; +        id_ns->flbas = 0; +        id_ns->mc = 0; +        id_ns->dpc = 0; +        id_ns->dps = 0; +        id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; +        id_ns->ncap  = id_ns->nuse = id_ns->nsze = +            cpu_to_le64(n->ns_size >> +                id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); +    } +    return 0; +} + +static void nvme_exit(PCIDevice *pci_dev) +{ +    NvmeCtrl *n = NVME(pci_dev); + +    nvme_clear_ctrl(n); +    g_free(n->namespaces); +    g_free(n->cq); +    g_free(n->sq); +    msix_uninit_exclusive_bar(pci_dev); +} + +static Property nvme_props[] = { +    DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), +    DEFINE_PROP_STRING("serial", NvmeCtrl, serial), +    DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription nvme_vmstate = { +    .name = "nvme", +    .unmigratable = 1, +}; + +static void nvme_class_init(ObjectClass *oc, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(oc); +    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + +    pc->init = nvme_init; +    pc->exit = nvme_exit; +    pc->class_id = PCI_CLASS_STORAGE_EXPRESS; +    pc->vendor_id = PCI_VENDOR_ID_INTEL; +    pc->device_id = 0x5845; +    pc->revision = 1; +    pc->is_express = 1; + +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +    dc->desc = "Non-Volatile Memory Express"; +    dc->props = nvme_props; +    dc->vmsd = &nvme_vmstate; +} + +static void nvme_get_bootindex(Object *obj, Visitor *v, void *opaque, +                                  const char *name, Error **errp) +{ +    NvmeCtrl *s = NVME(obj); + +    visit_type_int32(v, &s->conf.bootindex, name, errp); +} + +static void nvme_set_bootindex(Object *obj, Visitor *v, void *opaque, +                                  const char *name, Error **errp) +{ +    NvmeCtrl *s = NVME(obj); +    int32_t boot_index; +    Error *local_err = NULL; + +    visit_type_int32(v, &boot_index, name, &local_err); +    if (local_err) { +        goto out; +    } +    /* check whether bootindex is present in fw_boot_order list  */ +    check_boot_index(boot_index, &local_err); +    if (local_err) { +        goto out; +    } +    /* change bootindex to a new one */ +    s->conf.bootindex = boot_index; + +out: +    if (local_err) { +        error_propagate(errp, local_err); +    } +} + +static void nvme_instance_init(Object *obj) +{ +    object_property_add(obj, "bootindex", "int32", +                        nvme_get_bootindex, +                        nvme_set_bootindex, NULL, NULL, NULL); +    object_property_set_int(obj, -1, "bootindex", NULL); +} + +static const TypeInfo nvme_info = { +    .name          = "nvme", +    .parent        = TYPE_PCI_DEVICE, +    .instance_size = sizeof(NvmeCtrl), +    .class_init    = nvme_class_init, +    .instance_init = nvme_instance_init, +}; + +static void nvme_register_types(void) +{ +    type_register_static(&nvme_info); +} + +type_init(nvme_register_types) diff --git a/hw/block/nvme.h b/hw/block/nvme.h new file mode 100644 index 00000000..bf3a3cca --- /dev/null +++ b/hw/block/nvme.h @@ -0,0 +1,712 @@ +#ifndef HW_NVME_H +#define HW_NVME_H + +typedef struct NvmeBar { +    uint64_t    cap; +    uint32_t    vs; +    uint32_t    intms; +    uint32_t    intmc; +    uint32_t    cc; +    uint32_t    rsvd1; +    uint32_t    csts; +    uint32_t    nssrc; +    uint32_t    aqa; +    uint64_t    asq; +    uint64_t    acq; +} NvmeBar; + +enum NvmeCapShift { +    CAP_MQES_SHIFT     = 0, +    CAP_CQR_SHIFT      = 16, +    CAP_AMS_SHIFT      = 17, +    CAP_TO_SHIFT       = 24, +    CAP_DSTRD_SHIFT    = 32, +    CAP_NSSRS_SHIFT    = 33, +    CAP_CSS_SHIFT      = 37, +    CAP_MPSMIN_SHIFT   = 48, +    CAP_MPSMAX_SHIFT   = 52, +}; + +enum NvmeCapMask { +    CAP_MQES_MASK      = 0xffff, +    CAP_CQR_MASK       = 0x1, +    CAP_AMS_MASK       = 0x3, +    CAP_TO_MASK        = 0xff, +    CAP_DSTRD_MASK     = 0xf, +    CAP_NSSRS_MASK     = 0x1, +    CAP_CSS_MASK       = 0xff, +    CAP_MPSMIN_MASK    = 0xf, +    CAP_MPSMAX_MASK    = 0xf, +}; + +#define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK) +#define NVME_CAP_CQR(cap)   (((cap) >> CAP_CQR_SHIFT)    & CAP_CQR_MASK) +#define NVME_CAP_AMS(cap)   (((cap) >> CAP_AMS_SHIFT)    & CAP_AMS_MASK) +#define NVME_CAP_TO(cap)    (((cap) >> CAP_TO_SHIFT)     & CAP_TO_MASK) +#define NVME_CAP_DSTRD(cap) (((cap) >> CAP_DSTRD_SHIFT)  & CAP_DSTRD_MASK) +#define NVME_CAP_NSSRS(cap) (((cap) >> CAP_NSSRS_SHIFT)  & CAP_NSSRS_MASK) +#define NVME_CAP_CSS(cap)   (((cap) >> CAP_CSS_SHIFT)    & CAP_CSS_MASK) +#define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK) +#define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK) + +#define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  \ +                                                           << CAP_MQES_SHIFT) +#define NVME_CAP_SET_CQR(cap, val)    (cap |= (uint64_t)(val & CAP_CQR_MASK)   \ +                                                           << CAP_CQR_SHIFT) +#define NVME_CAP_SET_AMS(cap, val)    (cap |= (uint64_t)(val & CAP_AMS_MASK)   \ +                                                           << CAP_AMS_SHIFT) +#define NVME_CAP_SET_TO(cap, val)     (cap |= (uint64_t)(val & CAP_TO_MASK)    \ +                                                           << CAP_TO_SHIFT) +#define NVME_CAP_SET_DSTRD(cap, val)  (cap |= (uint64_t)(val & CAP_DSTRD_MASK) \ +                                                           << CAP_DSTRD_SHIFT) +#define NVME_CAP_SET_NSSRS(cap, val)  (cap |= (uint64_t)(val & CAP_NSSRS_MASK) \ +                                                           << CAP_NSSRS_SHIFT) +#define NVME_CAP_SET_CSS(cap, val)    (cap |= (uint64_t)(val & CAP_CSS_MASK)   \ +                                                           << CAP_CSS_SHIFT) +#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\ +                                                           << CAP_MPSMIN_SHIFT) +#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\ +                                                            << CAP_MPSMAX_SHIFT) + +enum NvmeCcShift { +    CC_EN_SHIFT     = 0, +    CC_CSS_SHIFT    = 4, +    CC_MPS_SHIFT    = 7, +    CC_AMS_SHIFT    = 11, +    CC_SHN_SHIFT    = 14, +    CC_IOSQES_SHIFT = 16, +    CC_IOCQES_SHIFT = 20, +}; + +enum NvmeCcMask { +    CC_EN_MASK      = 0x1, +    CC_CSS_MASK     = 0x7, +    CC_MPS_MASK     = 0xf, +    CC_AMS_MASK     = 0x7, +    CC_SHN_MASK     = 0x3, +    CC_IOSQES_MASK  = 0xf, +    CC_IOCQES_MASK  = 0xf, +}; + +#define NVME_CC_EN(cc)     ((cc >> CC_EN_SHIFT)     & CC_EN_MASK) +#define NVME_CC_CSS(cc)    ((cc >> CC_CSS_SHIFT)    & CC_CSS_MASK) +#define NVME_CC_MPS(cc)    ((cc >> CC_MPS_SHIFT)    & CC_MPS_MASK) +#define NVME_CC_AMS(cc)    ((cc >> CC_AMS_SHIFT)    & CC_AMS_MASK) +#define NVME_CC_SHN(cc)    ((cc >> CC_SHN_SHIFT)    & CC_SHN_MASK) +#define NVME_CC_IOSQES(cc) ((cc >> CC_IOSQES_SHIFT) & CC_IOSQES_MASK) +#define NVME_CC_IOCQES(cc) ((cc >> CC_IOCQES_SHIFT) & CC_IOCQES_MASK) + +enum NvmeCstsShift { +    CSTS_RDY_SHIFT      = 0, +    CSTS_CFS_SHIFT      = 1, +    CSTS_SHST_SHIFT     = 2, +    CSTS_NSSRO_SHIFT    = 4, +}; + +enum NvmeCstsMask { +    CSTS_RDY_MASK   = 0x1, +    CSTS_CFS_MASK   = 0x1, +    CSTS_SHST_MASK  = 0x3, +    CSTS_NSSRO_MASK = 0x1, +}; + +enum NvmeCsts { +    NVME_CSTS_READY         = 1 << CSTS_RDY_SHIFT, +    NVME_CSTS_FAILED        = 1 << CSTS_CFS_SHIFT, +    NVME_CSTS_SHST_NORMAL   = 0 << CSTS_SHST_SHIFT, +    NVME_CSTS_SHST_PROGRESS = 1 << CSTS_SHST_SHIFT, +    NVME_CSTS_SHST_COMPLETE = 2 << CSTS_SHST_SHIFT, +    NVME_CSTS_NSSRO         = 1 << CSTS_NSSRO_SHIFT, +}; + +#define NVME_CSTS_RDY(csts)     ((csts >> CSTS_RDY_SHIFT)   & CSTS_RDY_MASK) +#define NVME_CSTS_CFS(csts)     ((csts >> CSTS_CFS_SHIFT)   & CSTS_CFS_MASK) +#define NVME_CSTS_SHST(csts)    ((csts >> CSTS_SHST_SHIFT)  & CSTS_SHST_MASK) +#define NVME_CSTS_NSSRO(csts)   ((csts >> CSTS_NSSRO_SHIFT) & CSTS_NSSRO_MASK) + +enum NvmeAqaShift { +    AQA_ASQS_SHIFT  = 0, +    AQA_ACQS_SHIFT  = 16, +}; + +enum NvmeAqaMask { +    AQA_ASQS_MASK   = 0xfff, +    AQA_ACQS_MASK   = 0xfff, +}; + +#define NVME_AQA_ASQS(aqa) ((aqa >> AQA_ASQS_SHIFT) & AQA_ASQS_MASK) +#define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK) + +typedef struct NvmeCmd { +    uint8_t     opcode; +    uint8_t     fuse; +    uint16_t    cid; +    uint32_t    nsid; +    uint64_t    res1; +    uint64_t    mptr; +    uint64_t    prp1; +    uint64_t    prp2; +    uint32_t    cdw10; +    uint32_t    cdw11; +    uint32_t    cdw12; +    uint32_t    cdw13; +    uint32_t    cdw14; +    uint32_t    cdw15; +} NvmeCmd; + +enum NvmeAdminCommands { +    NVME_ADM_CMD_DELETE_SQ      = 0x00, +    NVME_ADM_CMD_CREATE_SQ      = 0x01, +    NVME_ADM_CMD_GET_LOG_PAGE   = 0x02, +    NVME_ADM_CMD_DELETE_CQ      = 0x04, +    NVME_ADM_CMD_CREATE_CQ      = 0x05, +    NVME_ADM_CMD_IDENTIFY       = 0x06, +    NVME_ADM_CMD_ABORT          = 0x08, +    NVME_ADM_CMD_SET_FEATURES   = 0x09, +    NVME_ADM_CMD_GET_FEATURES   = 0x0a, +    NVME_ADM_CMD_ASYNC_EV_REQ   = 0x0c, +    NVME_ADM_CMD_ACTIVATE_FW    = 0x10, +    NVME_ADM_CMD_DOWNLOAD_FW    = 0x11, +    NVME_ADM_CMD_FORMAT_NVM     = 0x80, +    NVME_ADM_CMD_SECURITY_SEND  = 0x81, +    NVME_ADM_CMD_SECURITY_RECV  = 0x82, +}; + +enum NvmeIoCommands { +    NVME_CMD_FLUSH              = 0x00, +    NVME_CMD_WRITE              = 0x01, +    NVME_CMD_READ               = 0x02, +    NVME_CMD_WRITE_UNCOR        = 0x04, +    NVME_CMD_COMPARE            = 0x05, +    NVME_CMD_DSM                = 0x09, +}; + +typedef struct NvmeDeleteQ { +    uint8_t     opcode; +    uint8_t     flags; +    uint16_t    cid; +    uint32_t    rsvd1[9]; +    uint16_t    qid; +    uint16_t    rsvd10; +    uint32_t    rsvd11[5]; +} NvmeDeleteQ; + +typedef struct NvmeCreateCq { +    uint8_t     opcode; +    uint8_t     flags; +    uint16_t    cid; +    uint32_t    rsvd1[5]; +    uint64_t    prp1; +    uint64_t    rsvd8; +    uint16_t    cqid; +    uint16_t    qsize; +    uint16_t    cq_flags; +    uint16_t    irq_vector; +    uint32_t    rsvd12[4]; +} NvmeCreateCq; + +#define NVME_CQ_FLAGS_PC(cq_flags)  (cq_flags & 0x1) +#define NVME_CQ_FLAGS_IEN(cq_flags) ((cq_flags >> 1) & 0x1) + +typedef struct NvmeCreateSq { +    uint8_t     opcode; +    uint8_t     flags; +    uint16_t    cid; +    uint32_t    rsvd1[5]; +    uint64_t    prp1; +    uint64_t    rsvd8; +    uint16_t    sqid; +    uint16_t    qsize; +    uint16_t    sq_flags; +    uint16_t    cqid; +    uint32_t    rsvd12[4]; +} NvmeCreateSq; + +#define NVME_SQ_FLAGS_PC(sq_flags)      (sq_flags & 0x1) +#define NVME_SQ_FLAGS_QPRIO(sq_flags)   ((sq_flags >> 1) & 0x3) + +enum NvmeQueueFlags { +    NVME_Q_PC           = 1, +    NVME_Q_PRIO_URGENT  = 0, +    NVME_Q_PRIO_HIGH    = 1, +    NVME_Q_PRIO_NORMAL  = 2, +    NVME_Q_PRIO_LOW     = 3, +}; + +typedef struct NvmeIdentify { +    uint8_t     opcode; +    uint8_t     flags; +    uint16_t    cid; +    uint32_t    nsid; +    uint64_t    rsvd2[2]; +    uint64_t    prp1; +    uint64_t    prp2; +    uint32_t    cns; +    uint32_t    rsvd11[5]; +} NvmeIdentify; + +typedef struct NvmeRwCmd { +    uint8_t     opcode; +    uint8_t     flags; +    uint16_t    cid; +    uint32_t    nsid; +    uint64_t    rsvd2; +    uint64_t    mptr; +    uint64_t    prp1; +    uint64_t    prp2; +    uint64_t    slba; +    uint16_t    nlb; +    uint16_t    control; +    uint32_t    dsmgmt; +    uint32_t    reftag; +    uint16_t    apptag; +    uint16_t    appmask; +} NvmeRwCmd; + +enum { +    NVME_RW_LR                  = 1 << 15, +    NVME_RW_FUA                 = 1 << 14, +    NVME_RW_DSM_FREQ_UNSPEC     = 0, +    NVME_RW_DSM_FREQ_TYPICAL    = 1, +    NVME_RW_DSM_FREQ_RARE       = 2, +    NVME_RW_DSM_FREQ_READS      = 3, +    NVME_RW_DSM_FREQ_WRITES     = 4, +    NVME_RW_DSM_FREQ_RW         = 5, +    NVME_RW_DSM_FREQ_ONCE       = 6, +    NVME_RW_DSM_FREQ_PREFETCH   = 7, +    NVME_RW_DSM_FREQ_TEMP       = 8, +    NVME_RW_DSM_LATENCY_NONE    = 0 << 4, +    NVME_RW_DSM_LATENCY_IDLE    = 1 << 4, +    NVME_RW_DSM_LATENCY_NORM    = 2 << 4, +    NVME_RW_DSM_LATENCY_LOW     = 3 << 4, +    NVME_RW_DSM_SEQ_REQ         = 1 << 6, +    NVME_RW_DSM_COMPRESSED      = 1 << 7, +    NVME_RW_PRINFO_PRACT        = 1 << 13, +    NVME_RW_PRINFO_PRCHK_GUARD  = 1 << 12, +    NVME_RW_PRINFO_PRCHK_APP    = 1 << 11, +    NVME_RW_PRINFO_PRCHK_REF    = 1 << 10, +}; + +typedef struct NvmeDsmCmd { +    uint8_t     opcode; +    uint8_t     flags; +    uint16_t    cid; +    uint32_t    nsid; +    uint64_t    rsvd2[2]; +    uint64_t    prp1; +    uint64_t    prp2; +    uint32_t    nr; +    uint32_t    attributes; +    uint32_t    rsvd12[4]; +} NvmeDsmCmd; + +enum { +    NVME_DSMGMT_IDR = 1 << 0, +    NVME_DSMGMT_IDW = 1 << 1, +    NVME_DSMGMT_AD  = 1 << 2, +}; + +typedef struct NvmeDsmRange { +    uint32_t    cattr; +    uint32_t    nlb; +    uint64_t    slba; +} NvmeDsmRange; + +enum NvmeAsyncEventRequest { +    NVME_AER_TYPE_ERROR                     = 0, +    NVME_AER_TYPE_SMART                     = 1, +    NVME_AER_TYPE_IO_SPECIFIC               = 6, +    NVME_AER_TYPE_VENDOR_SPECIFIC           = 7, +    NVME_AER_INFO_ERR_INVALID_SQ            = 0, +    NVME_AER_INFO_ERR_INVALID_DB            = 1, +    NVME_AER_INFO_ERR_DIAG_FAIL             = 2, +    NVME_AER_INFO_ERR_PERS_INTERNAL_ERR     = 3, +    NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR    = 4, +    NVME_AER_INFO_ERR_FW_IMG_LOAD_ERR       = 5, +    NVME_AER_INFO_SMART_RELIABILITY         = 0, +    NVME_AER_INFO_SMART_TEMP_THRESH         = 1, +    NVME_AER_INFO_SMART_SPARE_THRESH        = 2, +}; + +typedef struct NvmeAerResult { +    uint8_t event_type; +    uint8_t event_info; +    uint8_t log_page; +    uint8_t resv; +} NvmeAerResult; + +typedef struct NvmeCqe { +    uint32_t    result; +    uint32_t    rsvd; +    uint16_t    sq_head; +    uint16_t    sq_id; +    uint16_t    cid; +    uint16_t    status; +} NvmeCqe; + +enum NvmeStatusCodes { +    NVME_SUCCESS                = 0x0000, +    NVME_INVALID_OPCODE         = 0x0001, +    NVME_INVALID_FIELD          = 0x0002, +    NVME_CID_CONFLICT           = 0x0003, +    NVME_DATA_TRAS_ERROR        = 0x0004, +    NVME_POWER_LOSS_ABORT       = 0x0005, +    NVME_INTERNAL_DEV_ERROR     = 0x0006, +    NVME_CMD_ABORT_REQ          = 0x0007, +    NVME_CMD_ABORT_SQ_DEL       = 0x0008, +    NVME_CMD_ABORT_FAILED_FUSE  = 0x0009, +    NVME_CMD_ABORT_MISSING_FUSE = 0x000a, +    NVME_INVALID_NSID           = 0x000b, +    NVME_CMD_SEQ_ERROR          = 0x000c, +    NVME_LBA_RANGE              = 0x0080, +    NVME_CAP_EXCEEDED           = 0x0081, +    NVME_NS_NOT_READY           = 0x0082, +    NVME_NS_RESV_CONFLICT       = 0x0083, +    NVME_INVALID_CQID           = 0x0100, +    NVME_INVALID_QID            = 0x0101, +    NVME_MAX_QSIZE_EXCEEDED     = 0x0102, +    NVME_ACL_EXCEEDED           = 0x0103, +    NVME_RESERVED               = 0x0104, +    NVME_AER_LIMIT_EXCEEDED     = 0x0105, +    NVME_INVALID_FW_SLOT        = 0x0106, +    NVME_INVALID_FW_IMAGE       = 0x0107, +    NVME_INVALID_IRQ_VECTOR     = 0x0108, +    NVME_INVALID_LOG_ID         = 0x0109, +    NVME_INVALID_FORMAT         = 0x010a, +    NVME_FW_REQ_RESET           = 0x010b, +    NVME_INVALID_QUEUE_DEL      = 0x010c, +    NVME_FID_NOT_SAVEABLE       = 0x010d, +    NVME_FID_NOT_NSID_SPEC      = 0x010f, +    NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110, +    NVME_CONFLICTING_ATTRS      = 0x0180, +    NVME_INVALID_PROT_INFO      = 0x0181, +    NVME_WRITE_TO_RO            = 0x0182, +    NVME_WRITE_FAULT            = 0x0280, +    NVME_UNRECOVERED_READ       = 0x0281, +    NVME_E2E_GUARD_ERROR        = 0x0282, +    NVME_E2E_APP_ERROR          = 0x0283, +    NVME_E2E_REF_ERROR          = 0x0284, +    NVME_CMP_FAILURE            = 0x0285, +    NVME_ACCESS_DENIED          = 0x0286, +    NVME_MORE                   = 0x2000, +    NVME_DNR                    = 0x4000, +    NVME_NO_COMPLETE            = 0xffff, +}; + +typedef struct NvmeFwSlotInfoLog { +    uint8_t     afi; +    uint8_t     reserved1[7]; +    uint8_t     frs1[8]; +    uint8_t     frs2[8]; +    uint8_t     frs3[8]; +    uint8_t     frs4[8]; +    uint8_t     frs5[8]; +    uint8_t     frs6[8]; +    uint8_t     frs7[8]; +    uint8_t     reserved2[448]; +} NvmeFwSlotInfoLog; + +typedef struct NvmeErrorLog { +    uint64_t    error_count; +    uint16_t    sqid; +    uint16_t    cid; +    uint16_t    status_field; +    uint16_t    param_error_location; +    uint64_t    lba; +    uint32_t    nsid; +    uint8_t     vs; +    uint8_t     resv[35]; +} NvmeErrorLog; + +typedef struct NvmeSmartLog { +    uint8_t     critical_warning; +    uint8_t     temperature[2]; +    uint8_t     available_spare; +    uint8_t     available_spare_threshold; +    uint8_t     percentage_used; +    uint8_t     reserved1[26]; +    uint64_t    data_units_read[2]; +    uint64_t    data_units_written[2]; +    uint64_t    host_read_commands[2]; +    uint64_t    host_write_commands[2]; +    uint64_t    controller_busy_time[2]; +    uint64_t    power_cycles[2]; +    uint64_t    power_on_hours[2]; +    uint64_t    unsafe_shutdowns[2]; +    uint64_t    media_errors[2]; +    uint64_t    number_of_error_log_entries[2]; +    uint8_t     reserved2[320]; +} NvmeSmartLog; + +enum NvmeSmartWarn { +    NVME_SMART_SPARE                  = 1 << 0, +    NVME_SMART_TEMPERATURE            = 1 << 1, +    NVME_SMART_RELIABILITY            = 1 << 2, +    NVME_SMART_MEDIA_READ_ONLY        = 1 << 3, +    NVME_SMART_FAILED_VOLATILE_MEDIA  = 1 << 4, +}; + +enum LogIdentifier { +    NVME_LOG_ERROR_INFO     = 0x01, +    NVME_LOG_SMART_INFO     = 0x02, +    NVME_LOG_FW_SLOT_INFO   = 0x03, +}; + +typedef struct NvmePSD { +    uint16_t    mp; +    uint16_t    reserved; +    uint32_t    enlat; +    uint32_t    exlat; +    uint8_t     rrt; +    uint8_t     rrl; +    uint8_t     rwt; +    uint8_t     rwl; +    uint8_t     resv[16]; +} NvmePSD; + +typedef struct NvmeIdCtrl { +    uint16_t    vid; +    uint16_t    ssvid; +    uint8_t     sn[20]; +    uint8_t     mn[40]; +    uint8_t     fr[8]; +    uint8_t     rab; +    uint8_t     ieee[3]; +    uint8_t     cmic; +    uint8_t     mdts; +    uint8_t     rsvd255[178]; +    uint16_t    oacs; +    uint8_t     acl; +    uint8_t     aerl; +    uint8_t     frmw; +    uint8_t     lpa; +    uint8_t     elpe; +    uint8_t     npss; +    uint8_t     rsvd511[248]; +    uint8_t     sqes; +    uint8_t     cqes; +    uint16_t    rsvd515; +    uint32_t    nn; +    uint16_t    oncs; +    uint16_t    fuses; +    uint8_t     fna; +    uint8_t     vwc; +    uint16_t    awun; +    uint16_t    awupf; +    uint8_t     rsvd703[174]; +    uint8_t     rsvd2047[1344]; +    NvmePSD     psd[32]; +    uint8_t     vs[1024]; +} NvmeIdCtrl; + +enum NvmeIdCtrlOacs { +    NVME_OACS_SECURITY  = 1 << 0, +    NVME_OACS_FORMAT    = 1 << 1, +    NVME_OACS_FW        = 1 << 2, +}; + +enum NvmeIdCtrlOncs { +    NVME_ONCS_COMPARE       = 1 << 0, +    NVME_ONCS_WRITE_UNCORR  = 1 << 1, +    NVME_ONCS_DSM           = 1 << 2, +    NVME_ONCS_WRITE_ZEROS   = 1 << 3, +    NVME_ONCS_FEATURES      = 1 << 4, +    NVME_ONCS_RESRVATIONS   = 1 << 5, +}; + +#define NVME_CTRL_SQES_MIN(sqes) ((sqes) & 0xf) +#define NVME_CTRL_SQES_MAX(sqes) (((sqes) >> 4) & 0xf) +#define NVME_CTRL_CQES_MIN(cqes) ((cqes) & 0xf) +#define NVME_CTRL_CQES_MAX(cqes) (((cqes) >> 4) & 0xf) + +typedef struct NvmeFeatureVal { +    uint32_t    arbitration; +    uint32_t    power_mgmt; +    uint32_t    temp_thresh; +    uint32_t    err_rec; +    uint32_t    volatile_wc; +    uint32_t    num_queues; +    uint32_t    int_coalescing; +    uint32_t    *int_vector_config; +    uint32_t    write_atomicity; +    uint32_t    async_config; +    uint32_t    sw_prog_marker; +} NvmeFeatureVal; + +#define NVME_ARB_AB(arb)    (arb & 0x7) +#define NVME_ARB_LPW(arb)   ((arb >> 8) & 0xff) +#define NVME_ARB_MPW(arb)   ((arb >> 16) & 0xff) +#define NVME_ARB_HPW(arb)   ((arb >> 24) & 0xff) + +#define NVME_INTC_THR(intc)     (intc & 0xff) +#define NVME_INTC_TIME(intc)    ((intc >> 8) & 0xff) + +enum NvmeFeatureIds { +    NVME_ARBITRATION                = 0x1, +    NVME_POWER_MANAGEMENT           = 0x2, +    NVME_LBA_RANGE_TYPE             = 0x3, +    NVME_TEMPERATURE_THRESHOLD      = 0x4, +    NVME_ERROR_RECOVERY             = 0x5, +    NVME_VOLATILE_WRITE_CACHE       = 0x6, +    NVME_NUMBER_OF_QUEUES           = 0x7, +    NVME_INTERRUPT_COALESCING       = 0x8, +    NVME_INTERRUPT_VECTOR_CONF      = 0x9, +    NVME_WRITE_ATOMICITY            = 0xa, +    NVME_ASYNCHRONOUS_EVENT_CONF    = 0xb, +    NVME_SOFTWARE_PROGRESS_MARKER   = 0x80 +}; + +typedef struct NvmeRangeType { +    uint8_t     type; +    uint8_t     attributes; +    uint8_t     rsvd2[14]; +    uint64_t    slba; +    uint64_t    nlb; +    uint8_t     guid[16]; +    uint8_t     rsvd48[16]; +} NvmeRangeType; + +typedef struct NvmeLBAF { +    uint16_t    ms; +    uint8_t     ds; +    uint8_t     rp; +} NvmeLBAF; + +typedef struct NvmeIdNs { +    uint64_t    nsze; +    uint64_t    ncap; +    uint64_t    nuse; +    uint8_t     nsfeat; +    uint8_t     nlbaf; +    uint8_t     flbas; +    uint8_t     mc; +    uint8_t     dpc; +    uint8_t     dps; +    uint8_t     res30[98]; +    NvmeLBAF    lbaf[16]; +    uint8_t     res192[192]; +    uint8_t     vs[3712]; +} NvmeIdNs; + +#define NVME_ID_NS_NSFEAT_THIN(nsfeat)      ((nsfeat & 0x1)) +#define NVME_ID_NS_FLBAS_EXTENDED(flbas)    ((flbas >> 4) & 0x1) +#define NVME_ID_NS_FLBAS_INDEX(flbas)       ((flbas & 0xf)) +#define NVME_ID_NS_MC_SEPARATE(mc)          ((mc >> 1) & 0x1) +#define NVME_ID_NS_MC_EXTENDED(mc)          ((mc & 0x1)) +#define NVME_ID_NS_DPC_LAST_EIGHT(dpc)      ((dpc >> 4) & 0x1) +#define NVME_ID_NS_DPC_FIRST_EIGHT(dpc)     ((dpc >> 3) & 0x1) +#define NVME_ID_NS_DPC_TYPE_3(dpc)          ((dpc >> 2) & 0x1) +#define NVME_ID_NS_DPC_TYPE_2(dpc)          ((dpc >> 1) & 0x1) +#define NVME_ID_NS_DPC_TYPE_1(dpc)          ((dpc & 0x1)) +#define NVME_ID_NS_DPC_TYPE_MASK            0x7 + +enum NvmeIdNsDps { +    DPS_TYPE_NONE   = 0, +    DPS_TYPE_1      = 1, +    DPS_TYPE_2      = 2, +    DPS_TYPE_3      = 3, +    DPS_TYPE_MASK   = 0x7, +    DPS_FIRST_EIGHT = 8, +}; + +static inline void _nvme_check_size(void) +{ +    QEMU_BUILD_BUG_ON(sizeof(NvmeAerResult) != 4); +    QEMU_BUILD_BUG_ON(sizeof(NvmeCqe) != 16); +    QEMU_BUILD_BUG_ON(sizeof(NvmeDsmRange) != 16); +    QEMU_BUILD_BUG_ON(sizeof(NvmeCmd) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeDeleteQ) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeCreateCq) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeCreateSq) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeIdentify) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeRwCmd) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeDsmCmd) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeRangeType) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeErrorLog) != 64); +    QEMU_BUILD_BUG_ON(sizeof(NvmeFwSlotInfoLog) != 512); +    QEMU_BUILD_BUG_ON(sizeof(NvmeSmartLog) != 512); +    QEMU_BUILD_BUG_ON(sizeof(NvmeIdCtrl) != 4096); +    QEMU_BUILD_BUG_ON(sizeof(NvmeIdNs) != 4096); +} + +typedef struct NvmeAsyncEvent { +    QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; +    NvmeAerResult result; +} NvmeAsyncEvent; + +typedef struct NvmeRequest { +    struct NvmeSQueue       *sq; +    BlockAIOCB              *aiocb; +    uint16_t                status; +    bool                    has_sg; +    NvmeCqe                 cqe; +    BlockAcctCookie         acct; +    QEMUSGList              qsg; +    QTAILQ_ENTRY(NvmeRequest)entry; +} NvmeRequest; + +typedef struct NvmeSQueue { +    struct NvmeCtrl *ctrl; +    uint16_t    sqid; +    uint16_t    cqid; +    uint32_t    head; +    uint32_t    tail; +    uint32_t    size; +    uint64_t    dma_addr; +    QEMUTimer   *timer; +    NvmeRequest *io_req; +    QTAILQ_HEAD(sq_req_list, NvmeRequest) req_list; +    QTAILQ_HEAD(out_req_list, NvmeRequest) out_req_list; +    QTAILQ_ENTRY(NvmeSQueue) entry; +} NvmeSQueue; + +typedef struct NvmeCQueue { +    struct NvmeCtrl *ctrl; +    uint8_t     phase; +    uint16_t    cqid; +    uint16_t    irq_enabled; +    uint32_t    head; +    uint32_t    tail; +    uint32_t    vector; +    uint32_t    size; +    uint64_t    dma_addr; +    QEMUTimer   *timer; +    QTAILQ_HEAD(sq_list, NvmeSQueue) sq_list; +    QTAILQ_HEAD(cq_req_list, NvmeRequest) req_list; +} NvmeCQueue; + +typedef struct NvmeNamespace { +    NvmeIdNs        id_ns; +} NvmeNamespace; + +#define TYPE_NVME "nvme" +#define NVME(obj) \ +        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) + +typedef struct NvmeCtrl { +    PCIDevice    parent_obj; +    MemoryRegion iomem; +    NvmeBar      bar; +    BlockConf    conf; + +    uint32_t    page_size; +    uint16_t    page_bits; +    uint16_t    max_prp_ents; +    uint16_t    cqe_size; +    uint16_t    sqe_size; +    uint32_t    reg_size; +    uint32_t    num_namespaces; +    uint32_t    num_queues; +    uint32_t    max_q_ents; +    uint64_t    ns_size; + +    char            *serial; +    NvmeNamespace   *namespaces; +    NvmeSQueue      **sq; +    NvmeCQueue      **cq; +    NvmeSQueue      admin_sq; +    NvmeCQueue      admin_cq; +    NvmeIdCtrl      id_ctrl; +} NvmeCtrl; + +#endif /* HW_NVME_H */ diff --git a/hw/block/onenand.c b/hw/block/onenand.c new file mode 100644 index 00000000..1b2c8937 --- /dev/null +++ b/hw/block/onenand.c @@ -0,0 +1,848 @@ +/* + * OneNAND flash memories emulation. + * + * Copyright (C) 2008 Nokia Corporation + * Written by Andrzej Zaborowski <andrew@openedhand.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu-common.h" +#include "hw/hw.h" +#include "hw/block/flash.h" +#include "hw/irq.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "hw/sysbus.h" +#include "qemu/error-report.h" + +/* 11 for 2kB-page OneNAND ("2nd generation") and 10 for 1kB-page chips */ +#define PAGE_SHIFT	11 + +/* Fixed */ +#define BLOCK_SHIFT	(PAGE_SHIFT + 6) + +#define TYPE_ONE_NAND "onenand" +#define ONE_NAND(obj) OBJECT_CHECK(OneNANDState, (obj), TYPE_ONE_NAND) + +typedef struct OneNANDState { +    SysBusDevice parent_obj; + +    struct { +        uint16_t man; +        uint16_t dev; +        uint16_t ver; +    } id; +    int shift; +    hwaddr base; +    qemu_irq intr; +    qemu_irq rdy; +    BlockBackend *blk; +    BlockBackend *blk_cur; +    uint8_t *image; +    uint8_t *otp; +    uint8_t *current; +    MemoryRegion ram; +    MemoryRegion mapped_ram; +    uint8_t current_direction; +    uint8_t *boot[2]; +    uint8_t *data[2][2]; +    MemoryRegion iomem; +    MemoryRegion container; +    int cycle; +    int otpmode; + +    uint16_t addr[8]; +    uint16_t unladdr[8]; +    int bufaddr; +    int count; +    uint16_t command; +    uint16_t config[2]; +    uint16_t status; +    uint16_t intstatus; +    uint16_t wpstatus; + +    ECCState ecc; + +    int density_mask; +    int secs; +    int secs_cur; +    int blocks; +    uint8_t *blockwp; +} OneNANDState; + +enum { +    ONEN_BUF_BLOCK = 0, +    ONEN_BUF_BLOCK2 = 1, +    ONEN_BUF_DEST_BLOCK = 2, +    ONEN_BUF_DEST_PAGE = 3, +    ONEN_BUF_PAGE = 7, +}; + +enum { +    ONEN_ERR_CMD = 1 << 10, +    ONEN_ERR_ERASE = 1 << 11, +    ONEN_ERR_PROG = 1 << 12, +    ONEN_ERR_LOAD = 1 << 13, +}; + +enum { +    ONEN_INT_RESET = 1 << 4, +    ONEN_INT_ERASE = 1 << 5, +    ONEN_INT_PROG = 1 << 6, +    ONEN_INT_LOAD = 1 << 7, +    ONEN_INT = 1 << 15, +}; + +enum { +    ONEN_LOCK_LOCKTIGHTEN = 1 << 0, +    ONEN_LOCK_LOCKED = 1 << 1, +    ONEN_LOCK_UNLOCKED = 1 << 2, +}; + +static void onenand_mem_setup(OneNANDState *s) +{ +    /* XXX: We should use IO_MEM_ROMD but we broke it earlier... +     * Both 0x0000 ... 0x01ff and 0x8000 ... 0x800f can be used to +     * write boot commands.  Also take note of the BWPS bit.  */ +    memory_region_init(&s->container, OBJECT(s), "onenand", +                       0x10000 << s->shift); +    memory_region_add_subregion(&s->container, 0, &s->iomem); +    memory_region_init_alias(&s->mapped_ram, OBJECT(s), "onenand-mapped-ram", +                             &s->ram, 0x0200 << s->shift, +                             0xbe00 << s->shift); +    memory_region_add_subregion_overlap(&s->container, +                                        0x0200 << s->shift, +                                        &s->mapped_ram, +                                        1); +} + +static void onenand_intr_update(OneNANDState *s) +{ +    qemu_set_irq(s->intr, ((s->intstatus >> 15) ^ (~s->config[0] >> 6)) & 1); +} + +static void onenand_pre_save(void *opaque) +{ +    OneNANDState *s = opaque; +    if (s->current == s->otp) { +        s->current_direction = 1; +    } else if (s->current == s->image) { +        s->current_direction = 2; +    } else { +        s->current_direction = 0; +    } +} + +static int onenand_post_load(void *opaque, int version_id) +{ +    OneNANDState *s = opaque; +    switch (s->current_direction) { +    case 0: +        break; +    case 1: +        s->current = s->otp; +        break; +    case 2: +        s->current = s->image; +        break; +    default: +        return -1; +    } +    onenand_intr_update(s); +    return 0; +} + +static const VMStateDescription vmstate_onenand = { +    .name = "onenand", +    .version_id = 1, +    .minimum_version_id = 1, +    .pre_save = onenand_pre_save, +    .post_load = onenand_post_load, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(current_direction, OneNANDState), +        VMSTATE_INT32(cycle, OneNANDState), +        VMSTATE_INT32(otpmode, OneNANDState), +        VMSTATE_UINT16_ARRAY(addr, OneNANDState, 8), +        VMSTATE_UINT16_ARRAY(unladdr, OneNANDState, 8), +        VMSTATE_INT32(bufaddr, OneNANDState), +        VMSTATE_INT32(count, OneNANDState), +        VMSTATE_UINT16(command, OneNANDState), +        VMSTATE_UINT16_ARRAY(config, OneNANDState, 2), +        VMSTATE_UINT16(status, OneNANDState), +        VMSTATE_UINT16(intstatus, OneNANDState), +        VMSTATE_UINT16(wpstatus, OneNANDState), +        VMSTATE_INT32(secs_cur, OneNANDState), +        VMSTATE_PARTIAL_VBUFFER(blockwp, OneNANDState, blocks), +        VMSTATE_UINT8(ecc.cp, OneNANDState), +        VMSTATE_UINT16_ARRAY(ecc.lp, OneNANDState, 2), +        VMSTATE_UINT16(ecc.count, OneNANDState), +        VMSTATE_BUFFER_POINTER_UNSAFE(otp, OneNANDState, 0, +            ((64 + 2) << PAGE_SHIFT)), +        VMSTATE_END_OF_LIST() +    } +}; + +/* Hot reset (Reset OneNAND command) or warm reset (RP pin low) */ +static void onenand_reset(OneNANDState *s, int cold) +{ +    memset(&s->addr, 0, sizeof(s->addr)); +    s->command = 0; +    s->count = 1; +    s->bufaddr = 0; +    s->config[0] = 0x40c0; +    s->config[1] = 0x0000; +    onenand_intr_update(s); +    qemu_irq_raise(s->rdy); +    s->status = 0x0000; +    s->intstatus = cold ? 0x8080 : 0x8010; +    s->unladdr[0] = 0; +    s->unladdr[1] = 0; +    s->wpstatus = 0x0002; +    s->cycle = 0; +    s->otpmode = 0; +    s->blk_cur = s->blk; +    s->current = s->image; +    s->secs_cur = s->secs; + +    if (cold) { +        /* Lock the whole flash */ +        memset(s->blockwp, ONEN_LOCK_LOCKED, s->blocks); + +        if (s->blk_cur && blk_read(s->blk_cur, 0, s->boot[0], 8) < 0) { +            hw_error("%s: Loading the BootRAM failed.\n", __func__); +        } +    } +} + +static void onenand_system_reset(DeviceState *dev) +{ +    OneNANDState *s = ONE_NAND(dev); + +    onenand_reset(s, 1); +} + +static inline int onenand_load_main(OneNANDState *s, int sec, int secn, +                void *dest) +{ +    if (s->blk_cur) { +        return blk_read(s->blk_cur, sec, dest, secn) < 0; +    } else if (sec + secn > s->secs_cur) { +        return 1; +    } + +    memcpy(dest, s->current + (sec << 9), secn << 9); + +    return 0; +} + +static inline int onenand_prog_main(OneNANDState *s, int sec, int secn, +                void *src) +{ +    int result = 0; + +    if (secn > 0) { +        uint32_t size = (uint32_t)secn * 512; +        const uint8_t *sp = (const uint8_t *)src; +        uint8_t *dp = 0; +        if (s->blk_cur) { +            dp = g_malloc(size); +            if (!dp || blk_read(s->blk_cur, sec, dp, secn) < 0) { +                result = 1; +            } +        } else { +            if (sec + secn > s->secs_cur) { +                result = 1; +            } else { +                dp = (uint8_t *)s->current + (sec << 9); +            } +        } +        if (!result) { +            uint32_t i; +            for (i = 0; i < size; i++) { +                dp[i] &= sp[i]; +            } +            if (s->blk_cur) { +                result = blk_write(s->blk_cur, sec, dp, secn) < 0; +            } +        } +        if (dp && s->blk_cur) { +            g_free(dp); +        } +    } + +    return result; +} + +static inline int onenand_load_spare(OneNANDState *s, int sec, int secn, +                void *dest) +{ +    uint8_t buf[512]; + +    if (s->blk_cur) { +        if (blk_read(s->blk_cur, s->secs_cur + (sec >> 5), buf, 1) < 0) { +            return 1; +        } +        memcpy(dest, buf + ((sec & 31) << 4), secn << 4); +    } else if (sec + secn > s->secs_cur) { +        return 1; +    } else { +        memcpy(dest, s->current + (s->secs_cur << 9) + (sec << 4), secn << 4); +    } +  +    return 0; +} + +static inline int onenand_prog_spare(OneNANDState *s, int sec, int secn, +                void *src) +{ +    int result = 0; +    if (secn > 0) { +        const uint8_t *sp = (const uint8_t *)src; +        uint8_t *dp = 0, *dpp = 0; +        if (s->blk_cur) { +            dp = g_malloc(512); +            if (!dp +                || blk_read(s->blk_cur, s->secs_cur + (sec >> 5), dp, 1) < 0) { +                result = 1; +            } else { +                dpp = dp + ((sec & 31) << 4); +            } +        } else { +            if (sec + secn > s->secs_cur) { +                result = 1; +            } else { +                dpp = s->current + (s->secs_cur << 9) + (sec << 4); +            } +        } +        if (!result) { +            uint32_t i; +            for (i = 0; i < (secn << 4); i++) { +                dpp[i] &= sp[i]; +            } +            if (s->blk_cur) { +                result = blk_write(s->blk_cur, s->secs_cur + (sec >> 5), +                                   dp, 1) < 0; +            } +        } +        g_free(dp); +    } +    return result; +} + +static inline int onenand_erase(OneNANDState *s, int sec, int num) +{ +    uint8_t *blankbuf, *tmpbuf; + +    blankbuf = g_malloc(512); +    tmpbuf = g_malloc(512); +    memset(blankbuf, 0xff, 512); +    for (; num > 0; num--, sec++) { +        if (s->blk_cur) { +            int erasesec = s->secs_cur + (sec >> 5); +            if (blk_write(s->blk_cur, sec, blankbuf, 1) < 0) { +                goto fail; +            } +            if (blk_read(s->blk_cur, erasesec, tmpbuf, 1) < 0) { +                goto fail; +            } +            memcpy(tmpbuf + ((sec & 31) << 4), blankbuf, 1 << 4); +            if (blk_write(s->blk_cur, erasesec, tmpbuf, 1) < 0) { +                goto fail; +            } +        } else { +            if (sec + 1 > s->secs_cur) { +                goto fail; +            } +            memcpy(s->current + (sec << 9), blankbuf, 512); +            memcpy(s->current + (s->secs_cur << 9) + (sec << 4), +                   blankbuf, 1 << 4); +        } +    } + +    g_free(tmpbuf); +    g_free(blankbuf); +    return 0; + +fail: +    g_free(tmpbuf); +    g_free(blankbuf); +    return 1; +} + +static void onenand_command(OneNANDState *s) +{ +    int b; +    int sec; +    void *buf; +#define SETADDR(block, page)			\ +    sec = (s->addr[page] & 3) +			\ +            ((((s->addr[page] >> 2) & 0x3f) +	\ +              (((s->addr[block] & 0xfff) |	\ +                (s->addr[block] >> 15 ?		\ +                 s->density_mask : 0)) << 6)) << (PAGE_SHIFT - 9)); +#define SETBUF_M()				\ +    buf = (s->bufaddr & 8) ?			\ +            s->data[(s->bufaddr >> 2) & 1][0] : s->boot[0];	\ +    buf += (s->bufaddr & 3) << 9; +#define SETBUF_S()				\ +    buf = (s->bufaddr & 8) ?			\ +            s->data[(s->bufaddr >> 2) & 1][1] : s->boot[1];	\ +    buf += (s->bufaddr & 3) << 4; + +    switch (s->command) { +    case 0x00:	/* Load single/multiple sector data unit into buffer */ +        SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + +        SETBUF_M() +        if (onenand_load_main(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_LOAD; + +#if 0 +        SETBUF_S() +        if (onenand_load_spare(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_LOAD; +#endif + +        /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) +         * or    if (s->bufaddr & 1) + s->count was > 2 (1k-pages) +         * then we need two split the read/write into two chunks. +         */ +        s->intstatus |= ONEN_INT | ONEN_INT_LOAD; +        break; +    case 0x13:	/* Load single/multiple spare sector into buffer */ +        SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + +        SETBUF_S() +        if (onenand_load_spare(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_LOAD; + +        /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) +         * or    if (s->bufaddr & 1) + s->count was > 2 (1k-pages) +         * then we need two split the read/write into two chunks. +         */ +        s->intstatus |= ONEN_INT | ONEN_INT_LOAD; +        break; +    case 0x80:	/* Program single/multiple sector data unit from buffer */ +        SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + +        SETBUF_M() +        if (onenand_prog_main(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + +#if 0 +        SETBUF_S() +        if (onenand_prog_spare(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; +#endif + +        /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) +         * or    if (s->bufaddr & 1) + s->count was > 2 (1k-pages) +         * then we need two split the read/write into two chunks. +         */ +        s->intstatus |= ONEN_INT | ONEN_INT_PROG; +        break; +    case 0x1a:	/* Program single/multiple spare area sector from buffer */ +        SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) + +        SETBUF_S() +        if (onenand_prog_spare(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + +        /* TODO: if (s->bufaddr & 3) + s->count was > 4 (2k-pages) +         * or    if (s->bufaddr & 1) + s->count was > 2 (1k-pages) +         * then we need two split the read/write into two chunks. +         */ +        s->intstatus |= ONEN_INT | ONEN_INT_PROG; +        break; +    case 0x1b:	/* Copy-back program */ +        SETBUF_S() + +        SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) +        if (onenand_load_main(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + +        SETADDR(ONEN_BUF_DEST_BLOCK, ONEN_BUF_DEST_PAGE) +        if (onenand_prog_main(s, sec, s->count, buf)) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_PROG; + +        /* TODO: spare areas */ + +        s->intstatus |= ONEN_INT | ONEN_INT_PROG; +        break; + +    case 0x23:	/* Unlock NAND array block(s) */ +        s->intstatus |= ONEN_INT; + +        /* XXX the previous (?) area should be locked automatically */ +        for (b = s->unladdr[0]; b <= s->unladdr[1]; b ++) { +            if (b >= s->blocks) { +                s->status |= ONEN_ERR_CMD; +                break; +            } +            if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) +                break; + +            s->wpstatus = s->blockwp[b] = ONEN_LOCK_UNLOCKED; +        } +        break; +    case 0x27:	/* Unlock All NAND array blocks */ +        s->intstatus |= ONEN_INT; + +        for (b = 0; b < s->blocks; b ++) { +            if (b >= s->blocks) { +                s->status |= ONEN_ERR_CMD; +                break; +            } +            if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) +                break; + +            s->wpstatus = s->blockwp[b] = ONEN_LOCK_UNLOCKED; +        } +        break; + +    case 0x2a:	/* Lock NAND array block(s) */ +        s->intstatus |= ONEN_INT; + +        for (b = s->unladdr[0]; b <= s->unladdr[1]; b ++) { +            if (b >= s->blocks) { +                s->status |= ONEN_ERR_CMD; +                break; +            } +            if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN) +                break; + +            s->wpstatus = s->blockwp[b] = ONEN_LOCK_LOCKED; +        } +        break; +    case 0x2c:	/* Lock-tight NAND array block(s) */ +        s->intstatus |= ONEN_INT; + +        for (b = s->unladdr[0]; b <= s->unladdr[1]; b ++) { +            if (b >= s->blocks) { +                s->status |= ONEN_ERR_CMD; +                break; +            } +            if (s->blockwp[b] == ONEN_LOCK_UNLOCKED) +                continue; + +            s->wpstatus = s->blockwp[b] = ONEN_LOCK_LOCKTIGHTEN; +        } +        break; + +    case 0x71:	/* Erase-Verify-Read */ +        s->intstatus |= ONEN_INT; +        break; +    case 0x95:	/* Multi-block erase */ +        qemu_irq_pulse(s->intr); +        /* Fall through.  */ +    case 0x94:	/* Block erase */ +        sec = ((s->addr[ONEN_BUF_BLOCK] & 0xfff) | +                        (s->addr[ONEN_BUF_BLOCK] >> 15 ? s->density_mask : 0)) +                << (BLOCK_SHIFT - 9); +        if (onenand_erase(s, sec, 1 << (BLOCK_SHIFT - 9))) +            s->status |= ONEN_ERR_CMD | ONEN_ERR_ERASE; + +        s->intstatus |= ONEN_INT | ONEN_INT_ERASE; +        break; +    case 0xb0:	/* Erase suspend */ +        break; +    case 0x30:	/* Erase resume */ +        s->intstatus |= ONEN_INT | ONEN_INT_ERASE; +        break; + +    case 0xf0:	/* Reset NAND Flash core */ +        onenand_reset(s, 0); +        break; +    case 0xf3:	/* Reset OneNAND */ +        onenand_reset(s, 0); +        break; + +    case 0x65:	/* OTP Access */ +        s->intstatus |= ONEN_INT; +        s->blk_cur = NULL; +        s->current = s->otp; +        s->secs_cur = 1 << (BLOCK_SHIFT - 9); +        s->addr[ONEN_BUF_BLOCK] = 0; +        s->otpmode = 1; +        break; + +    default: +        s->status |= ONEN_ERR_CMD; +        s->intstatus |= ONEN_INT; +        fprintf(stderr, "%s: unknown OneNAND command %x\n", +                        __func__, s->command); +    } + +    onenand_intr_update(s); +} + +static uint64_t onenand_read(void *opaque, hwaddr addr, +                             unsigned size) +{ +    OneNANDState *s = (OneNANDState *) opaque; +    int offset = addr >> s->shift; + +    switch (offset) { +    case 0x0000 ... 0xc000: +        return lduw_le_p(s->boot[0] + addr); + +    case 0xf000:	/* Manufacturer ID */ +        return s->id.man; +    case 0xf001:	/* Device ID */ +        return s->id.dev; +    case 0xf002:	/* Version ID */ +        return s->id.ver; +    /* TODO: get the following values from a real chip!  */ +    case 0xf003:	/* Data Buffer size */ +        return 1 << PAGE_SHIFT; +    case 0xf004:	/* Boot Buffer size */ +        return 0x200; +    case 0xf005:	/* Amount of buffers */ +        return 1 | (2 << 8); +    case 0xf006:	/* Technology */ +        return 0; + +    case 0xf100 ... 0xf107:	/* Start addresses */ +        return s->addr[offset - 0xf100]; + +    case 0xf200:	/* Start buffer */ +        return (s->bufaddr << 8) | ((s->count - 1) & (1 << (PAGE_SHIFT - 10))); + +    case 0xf220:	/* Command */ +        return s->command; +    case 0xf221:	/* System Configuration 1 */ +        return s->config[0] & 0xffe0; +    case 0xf222:	/* System Configuration 2 */ +        return s->config[1]; + +    case 0xf240:	/* Controller Status */ +        return s->status; +    case 0xf241:	/* Interrupt */ +        return s->intstatus; +    case 0xf24c:	/* Unlock Start Block Address */ +        return s->unladdr[0]; +    case 0xf24d:	/* Unlock End Block Address */ +        return s->unladdr[1]; +    case 0xf24e:	/* Write Protection Status */ +        return s->wpstatus; + +    case 0xff00:	/* ECC Status */ +        return 0x00; +    case 0xff01:	/* ECC Result of main area data */ +    case 0xff02:	/* ECC Result of spare area data */ +    case 0xff03:	/* ECC Result of main area data */ +    case 0xff04:	/* ECC Result of spare area data */ +        hw_error("%s: imeplement ECC\n", __FUNCTION__); +        return 0x0000; +    } + +    fprintf(stderr, "%s: unknown OneNAND register %x\n", +                    __FUNCTION__, offset); +    return 0; +} + +static void onenand_write(void *opaque, hwaddr addr, +                          uint64_t value, unsigned size) +{ +    OneNANDState *s = (OneNANDState *) opaque; +    int offset = addr >> s->shift; +    int sec; + +    switch (offset) { +    case 0x0000 ... 0x01ff: +    case 0x8000 ... 0x800f: +        if (s->cycle) { +            s->cycle = 0; + +            if (value == 0x0000) { +                SETADDR(ONEN_BUF_BLOCK, ONEN_BUF_PAGE) +                onenand_load_main(s, sec, +                                1 << (PAGE_SHIFT - 9), s->data[0][0]); +                s->addr[ONEN_BUF_PAGE] += 4; +                s->addr[ONEN_BUF_PAGE] &= 0xff; +            } +            break; +        } + +        switch (value) { +        case 0x00f0:	/* Reset OneNAND */ +            onenand_reset(s, 0); +            break; + +        case 0x00e0:	/* Load Data into Buffer */ +            s->cycle = 1; +            break; + +        case 0x0090:	/* Read Identification Data */ +            memset(s->boot[0], 0, 3 << s->shift); +            s->boot[0][0 << s->shift] = s->id.man & 0xff; +            s->boot[0][1 << s->shift] = s->id.dev & 0xff; +            s->boot[0][2 << s->shift] = s->wpstatus & 0xff; +            break; + +        default: +            fprintf(stderr, "%s: unknown OneNAND boot command %"PRIx64"\n", +                            __FUNCTION__, value); +        } +        break; + +    case 0xf100 ... 0xf107:	/* Start addresses */ +        s->addr[offset - 0xf100] = value; +        break; + +    case 0xf200:	/* Start buffer */ +        s->bufaddr = (value >> 8) & 0xf; +        if (PAGE_SHIFT == 11) +            s->count = (value & 3) ?: 4; +        else if (PAGE_SHIFT == 10) +            s->count = (value & 1) ?: 2; +        break; + +    case 0xf220:	/* Command */ +        if (s->intstatus & (1 << 15)) +            break; +        s->command = value; +        onenand_command(s); +        break; +    case 0xf221:	/* System Configuration 1 */ +        s->config[0] = value; +        onenand_intr_update(s); +        qemu_set_irq(s->rdy, (s->config[0] >> 7) & 1); +        break; +    case 0xf222:	/* System Configuration 2 */ +        s->config[1] = value; +        break; + +    case 0xf241:	/* Interrupt */ +        s->intstatus &= value; +        if ((1 << 15) & ~s->intstatus) +            s->status &= ~(ONEN_ERR_CMD | ONEN_ERR_ERASE | +                            ONEN_ERR_PROG | ONEN_ERR_LOAD); +        onenand_intr_update(s); +        break; +    case 0xf24c:	/* Unlock Start Block Address */ +        s->unladdr[0] = value & (s->blocks - 1); +        /* For some reason we have to set the end address to by default +         * be same as start because the software forgets to write anything +         * in there.  */ +        s->unladdr[1] = value & (s->blocks - 1); +        break; +    case 0xf24d:	/* Unlock End Block Address */ +        s->unladdr[1] = value & (s->blocks - 1); +        break; + +    default: +        fprintf(stderr, "%s: unknown OneNAND register %x\n", +                        __FUNCTION__, offset); +    } +} + +static const MemoryRegionOps onenand_ops = { +    .read = onenand_read, +    .write = onenand_write, +    .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static int onenand_initfn(SysBusDevice *sbd) +{ +    DeviceState *dev = DEVICE(sbd); +    OneNANDState *s = ONE_NAND(dev); +    uint32_t size = 1 << (24 + ((s->id.dev >> 4) & 7)); +    void *ram; + +    s->base = (hwaddr)-1; +    s->rdy = NULL; +    s->blocks = size >> BLOCK_SHIFT; +    s->secs = size >> 9; +    s->blockwp = g_malloc(s->blocks); +    s->density_mask = (s->id.dev & 0x08) +        ? (1 << (6 + ((s->id.dev >> 4) & 7))) : 0; +    memory_region_init_io(&s->iomem, OBJECT(s), &onenand_ops, s, "onenand", +                          0x10000 << s->shift); +    if (!s->blk) { +        s->image = memset(g_malloc(size + (size >> 5)), +                          0xff, size + (size >> 5)); +    } else { +        if (blk_is_read_only(s->blk)) { +            error_report("Can't use a read-only drive"); +            return -1; +        } +        s->blk_cur = s->blk; +    } +    s->otp = memset(g_malloc((64 + 2) << PAGE_SHIFT), +                    0xff, (64 + 2) << PAGE_SHIFT); +    memory_region_init_ram(&s->ram, OBJECT(s), "onenand.ram", +                           0xc000 << s->shift, &error_abort); +    vmstate_register_ram_global(&s->ram); +    ram = memory_region_get_ram_ptr(&s->ram); +    s->boot[0] = ram + (0x0000 << s->shift); +    s->boot[1] = ram + (0x8000 << s->shift); +    s->data[0][0] = ram + ((0x0200 + (0 << (PAGE_SHIFT - 1))) << s->shift); +    s->data[0][1] = ram + ((0x8010 + (0 << (PAGE_SHIFT - 6))) << s->shift); +    s->data[1][0] = ram + ((0x0200 + (1 << (PAGE_SHIFT - 1))) << s->shift); +    s->data[1][1] = ram + ((0x8010 + (1 << (PAGE_SHIFT - 6))) << s->shift); +    onenand_mem_setup(s); +    sysbus_init_irq(sbd, &s->intr); +    sysbus_init_mmio(sbd, &s->container); +    vmstate_register(dev, +                     ((s->shift & 0x7f) << 24) +                     | ((s->id.man & 0xff) << 16) +                     | ((s->id.dev & 0xff) << 8) +                     | (s->id.ver & 0xff), +                     &vmstate_onenand, s); +    return 0; +} + +static Property onenand_properties[] = { +    DEFINE_PROP_UINT16("manufacturer_id", OneNANDState, id.man, 0), +    DEFINE_PROP_UINT16("device_id", OneNANDState, id.dev, 0), +    DEFINE_PROP_UINT16("version_id", OneNANDState, id.ver, 0), +    DEFINE_PROP_INT32("shift", OneNANDState, shift, 0), +    DEFINE_PROP_DRIVE("drive", OneNANDState, blk), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void onenand_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); +    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + +    k->init = onenand_initfn; +    dc->reset = onenand_system_reset; +    dc->props = onenand_properties; +} + +static const TypeInfo onenand_info = { +    .name          = TYPE_ONE_NAND, +    .parent        = TYPE_SYS_BUS_DEVICE, +    .instance_size = sizeof(OneNANDState), +    .class_init    = onenand_class_init, +}; + +static void onenand_register_types(void) +{ +    type_register_static(&onenand_info); +} + +void *onenand_raw_otp(DeviceState *onenand_device) +{ +    OneNANDState *s = ONE_NAND(onenand_device); + +    return s->otp; +} + +type_init(onenand_register_types) diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c new file mode 100644 index 00000000..2ba6c772 --- /dev/null +++ b/hw/block/pflash_cfi01.c @@ -0,0 +1,954 @@ +/* + *  CFI parallel flash with Intel command set emulation + * + *  Copyright (c) 2006 Thorsten Zitterell + *  Copyright (c) 2005 Jocelyn Mayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * For now, this code can emulate flashes of 1, 2 or 4 bytes width. + * Supported commands/modes are: + * - flash read + * - flash write + * - flash ID read + * - sector erase + * - CFI queries + * + * It does not support timings + * It does not support flash interleaving + * It does not implement software data protection as found in many real chips + * It does not implement erase suspend/resume commands + * It does not implement multiple sectors erase + * + * It does not implement much more ... + */ + +#include "hw/hw.h" +#include "hw/block/flash.h" +#include "sysemu/block-backend.h" +#include "qemu/timer.h" +#include "qemu/bitops.h" +#include "exec/address-spaces.h" +#include "qemu/host-utils.h" +#include "hw/sysbus.h" + +#define PFLASH_BUG(fmt, ...) \ +do { \ +    fprintf(stderr, "PFLASH: Possible BUG - " fmt, ## __VA_ARGS__); \ +    exit(1); \ +} while(0) + +/* #define PFLASH_DEBUG */ +#ifdef PFLASH_DEBUG +#define DPRINTF(fmt, ...)                                   \ +do {                                                        \ +    fprintf(stderr, "PFLASH: " fmt , ## __VA_ARGS__);       \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#define TYPE_CFI_PFLASH01 "cfi.pflash01" +#define CFI_PFLASH01(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH01) + +#define PFLASH_BE          0 +#define PFLASH_SECURE      1 + +struct pflash_t { +    /*< private >*/ +    SysBusDevice parent_obj; +    /*< public >*/ + +    BlockBackend *blk; +    uint32_t nb_blocs; +    uint64_t sector_len; +    uint8_t bank_width; +    uint8_t device_width; /* If 0, device width not specified. */ +    uint8_t max_device_width;  /* max device width in bytes */ +    uint32_t features; +    uint8_t wcycle; /* if 0, the flash is read normally */ +    int ro; +    uint8_t cmd; +    uint8_t status; +    uint16_t ident0; +    uint16_t ident1; +    uint16_t ident2; +    uint16_t ident3; +    uint8_t cfi_len; +    uint8_t cfi_table[0x52]; +    uint64_t counter; +    unsigned int writeblock_size; +    QEMUTimer *timer; +    MemoryRegion mem; +    char *name; +    void *storage; +}; + +static int pflash_post_load(void *opaque, int version_id); + +static const VMStateDescription vmstate_pflash = { +    .name = "pflash_cfi01", +    .version_id = 1, +    .minimum_version_id = 1, +    .post_load = pflash_post_load, +    .fields = (VMStateField[]) { +        VMSTATE_UINT8(wcycle, pflash_t), +        VMSTATE_UINT8(cmd, pflash_t), +        VMSTATE_UINT8(status, pflash_t), +        VMSTATE_UINT64(counter, pflash_t), +        VMSTATE_END_OF_LIST() +    } +}; + +static void pflash_timer (void *opaque) +{ +    pflash_t *pfl = opaque; + +    DPRINTF("%s: command %02x done\n", __func__, pfl->cmd); +    /* Reset flash */ +    pfl->status ^= 0x80; +    memory_region_rom_device_set_romd(&pfl->mem, true); +    pfl->wcycle = 0; +    pfl->cmd = 0; +} + +/* Perform a CFI query based on the bank width of the flash. + * If this code is called we know we have a device_width set for + * this flash. + */ +static uint32_t pflash_cfi_query(pflash_t *pfl, hwaddr offset) +{ +    int i; +    uint32_t resp = 0; +    hwaddr boff; + +    /* Adjust incoming offset to match expected device-width +     * addressing. CFI query addresses are always specified in terms of +     * the maximum supported width of the device.  This means that x8 +     * devices and x8/x16 devices in x8 mode behave differently.  For +     * devices that are not used at their max width, we will be +     * provided with addresses that use higher address bits than +     * expected (based on the max width), so we will shift them lower +     * so that they will match the addresses used when +     * device_width==max_device_width. +     */ +    boff = offset >> (ctz32(pfl->bank_width) + +                      ctz32(pfl->max_device_width) - ctz32(pfl->device_width)); + +    if (boff > pfl->cfi_len) { +        return 0; +    } +    /* Now we will construct the CFI response generated by a single +     * device, then replicate that for all devices that make up the +     * bus.  For wide parts used in x8 mode, CFI query responses +     * are different than native byte-wide parts. +     */ +    resp = pfl->cfi_table[boff]; +    if (pfl->device_width != pfl->max_device_width) { +        /* The only case currently supported is x8 mode for a +         * wider part. +         */ +        if (pfl->device_width != 1 || pfl->bank_width > 4) { +            DPRINTF("%s: Unsupported device configuration: " +                    "device_width=%d, max_device_width=%d\n", +                    __func__, pfl->device_width, +                    pfl->max_device_width); +            return 0; +        } +        /* CFI query data is repeated, rather than zero padded for +         * wide devices used in x8 mode. +         */ +        for (i = 1; i < pfl->max_device_width; i++) { +            resp = deposit32(resp, 8 * i, 8, pfl->cfi_table[boff]); +        } +    } +    /* Replicate responses for each device in bank. */ +    if (pfl->device_width < pfl->bank_width) { +        for (i = pfl->device_width; +             i < pfl->bank_width; i += pfl->device_width) { +            resp = deposit32(resp, 8 * i, 8 * pfl->device_width, resp); +        } +    } + +    return resp; +} + + + +/* Perform a device id query based on the bank width of the flash. */ +static uint32_t pflash_devid_query(pflash_t *pfl, hwaddr offset) +{ +    int i; +    uint32_t resp; +    hwaddr boff; + +    /* Adjust incoming offset to match expected device-width +     * addressing. Device ID read addresses are always specified in +     * terms of the maximum supported width of the device.  This means +     * that x8 devices and x8/x16 devices in x8 mode behave +     * differently. For devices that are not used at their max width, +     * we will be provided with addresses that use higher address bits +     * than expected (based on the max width), so we will shift them +     * lower so that they will match the addresses used when +     * device_width==max_device_width. +     */ +    boff = offset >> (ctz32(pfl->bank_width) + +                      ctz32(pfl->max_device_width) - ctz32(pfl->device_width)); + +    /* Mask off upper bits which may be used in to query block +     * or sector lock status at other addresses. +     * Offsets 2/3 are block lock status, is not emulated. +     */ +    switch (boff & 0xFF) { +    case 0: +        resp = pfl->ident0; +        DPRINTF("%s: Manufacturer Code %04x\n", __func__, resp); +        break; +    case 1: +        resp = pfl->ident1; +        DPRINTF("%s: Device ID Code %04x\n", __func__, resp); +        break; +    default: +        DPRINTF("%s: Read Device Information offset=%x\n", __func__, +                (unsigned)offset); +        return 0; +        break; +    } +    /* Replicate responses for each device in bank. */ +    if (pfl->device_width < pfl->bank_width) { +        for (i = pfl->device_width; +              i < pfl->bank_width; i += pfl->device_width) { +            resp = deposit32(resp, 8 * i, 8 * pfl->device_width, resp); +        } +    } + +    return resp; +} + +static uint32_t pflash_data_read(pflash_t *pfl, hwaddr offset, +                                 int width, int be) +{ +    uint8_t *p; +    uint32_t ret; + +    p = pfl->storage; +    switch (width) { +    case 1: +        ret = p[offset]; +        DPRINTF("%s: data offset " TARGET_FMT_plx " %02x\n", +                __func__, offset, ret); +        break; +    case 2: +        if (be) { +            ret = p[offset] << 8; +            ret |= p[offset + 1]; +        } else { +            ret = p[offset]; +            ret |= p[offset + 1] << 8; +        } +        DPRINTF("%s: data offset " TARGET_FMT_plx " %04x\n", +                __func__, offset, ret); +        break; +    case 4: +        if (be) { +            ret = p[offset] << 24; +            ret |= p[offset + 1] << 16; +            ret |= p[offset + 2] << 8; +            ret |= p[offset + 3]; +        } else { +            ret = p[offset]; +            ret |= p[offset + 1] << 8; +            ret |= p[offset + 2] << 16; +            ret |= p[offset + 3] << 24; +        } +        DPRINTF("%s: data offset " TARGET_FMT_plx " %08x\n", +                __func__, offset, ret); +        break; +    default: +        DPRINTF("BUG in %s\n", __func__); +        abort(); +    } +    return ret; +} + +static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, +                             int width, int be) +{ +    hwaddr boff; +    uint32_t ret; + +    ret = -1; + +#if 0 +    DPRINTF("%s: reading offset " TARGET_FMT_plx " under cmd %02x width %d\n", +            __func__, offset, pfl->cmd, width); +#endif +    switch (pfl->cmd) { +    default: +        /* This should never happen : reset state & treat it as a read */ +        DPRINTF("%s: unknown command state: %x\n", __func__, pfl->cmd); +        pfl->wcycle = 0; +        pfl->cmd = 0; +        /* fall through to read code */ +    case 0x00: +        /* Flash area read */ +        ret = pflash_data_read(pfl, offset, width, be); +        break; +    case 0x10: /* Single byte program */ +    case 0x20: /* Block erase */ +    case 0x28: /* Block erase */ +    case 0x40: /* single byte program */ +    case 0x50: /* Clear status register */ +    case 0x60: /* Block /un)lock */ +    case 0x70: /* Status Register */ +    case 0xe8: /* Write block */ +        /* Status register read.  Return status from each device in +         * bank. +         */ +        ret = pfl->status; +        if (pfl->device_width && width > pfl->device_width) { +            int shift = pfl->device_width * 8; +            while (shift + pfl->device_width * 8 <= width * 8) { +                ret |= pfl->status << shift; +                shift += pfl->device_width * 8; +            } +        } else if (!pfl->device_width && width > 2) { +            /* Handle 32 bit flash cases where device width is not +             * set. (Existing behavior before device width added.) +             */ +            ret |= pfl->status << 16; +        } +        DPRINTF("%s: status %x\n", __func__, ret); +        break; +    case 0x90: +        if (!pfl->device_width) { +            /* Preserve old behavior if device width not specified */ +            boff = offset & 0xFF; +            if (pfl->bank_width == 2) { +                boff = boff >> 1; +            } else if (pfl->bank_width == 4) { +                boff = boff >> 2; +            } + +            switch (boff) { +            case 0: +                ret = pfl->ident0 << 8 | pfl->ident1; +                DPRINTF("%s: Manufacturer Code %04x\n", __func__, ret); +                break; +            case 1: +                ret = pfl->ident2 << 8 | pfl->ident3; +                DPRINTF("%s: Device ID Code %04x\n", __func__, ret); +                break; +            default: +                DPRINTF("%s: Read Device Information boff=%x\n", __func__, +                        (unsigned)boff); +                ret = 0; +                break; +            } +        } else { +            /* If we have a read larger than the bank_width, combine multiple +             * manufacturer/device ID queries into a single response. +             */ +            int i; +            for (i = 0; i < width; i += pfl->bank_width) { +                ret = deposit32(ret, i * 8, pfl->bank_width * 8, +                                pflash_devid_query(pfl, +                                                 offset + i * pfl->bank_width)); +            } +        } +        break; +    case 0x98: /* Query mode */ +        if (!pfl->device_width) { +            /* Preserve old behavior if device width not specified */ +            boff = offset & 0xFF; +            if (pfl->bank_width == 2) { +                boff = boff >> 1; +            } else if (pfl->bank_width == 4) { +                boff = boff >> 2; +            } + +            if (boff > pfl->cfi_len) { +                ret = 0; +            } else { +                ret = pfl->cfi_table[boff]; +            } +        } else { +            /* If we have a read larger than the bank_width, combine multiple +             * CFI queries into a single response. +             */ +            int i; +            for (i = 0; i < width; i += pfl->bank_width) { +                ret = deposit32(ret, i * 8, pfl->bank_width * 8, +                                pflash_cfi_query(pfl, +                                                 offset + i * pfl->bank_width)); +            } +        } + +        break; +    } +    return ret; +} + +/* update flash content on disk */ +static void pflash_update(pflash_t *pfl, int offset, +                          int size) +{ +    int offset_end; +    if (pfl->blk) { +        offset_end = offset + size; +        /* round to sectors */ +        offset = offset >> 9; +        offset_end = (offset_end + 511) >> 9; +        blk_write(pfl->blk, offset, pfl->storage + (offset << 9), +                  offset_end - offset); +    } +} + +static inline void pflash_data_write(pflash_t *pfl, hwaddr offset, +                                     uint32_t value, int width, int be) +{ +    uint8_t *p = pfl->storage; + +    DPRINTF("%s: block write offset " TARGET_FMT_plx +            " value %x counter %016" PRIx64 "\n", +            __func__, offset, value, pfl->counter); +    switch (width) { +    case 1: +        p[offset] = value; +        break; +    case 2: +        if (be) { +            p[offset] = value >> 8; +            p[offset + 1] = value; +        } else { +            p[offset] = value; +            p[offset + 1] = value >> 8; +        } +        break; +    case 4: +        if (be) { +            p[offset] = value >> 24; +            p[offset + 1] = value >> 16; +            p[offset + 2] = value >> 8; +            p[offset + 3] = value; +        } else { +            p[offset] = value; +            p[offset + 1] = value >> 8; +            p[offset + 2] = value >> 16; +            p[offset + 3] = value >> 24; +        } +        break; +    } + +} + +static void pflash_write(pflash_t *pfl, hwaddr offset, +                         uint32_t value, int width, int be) +{ +    uint8_t *p; +    uint8_t cmd; + +    cmd = value; + +    DPRINTF("%s: writing offset " TARGET_FMT_plx " value %08x width %d wcycle 0x%x\n", +            __func__, offset, value, width, pfl->wcycle); + +    if (!pfl->wcycle) { +        /* Set the device in I/O access mode */ +        memory_region_rom_device_set_romd(&pfl->mem, false); +    } + +    switch (pfl->wcycle) { +    case 0: +        /* read mode */ +        switch (cmd) { +        case 0x00: /* ??? */ +            goto reset_flash; +        case 0x10: /* Single Byte Program */ +        case 0x40: /* Single Byte Program */ +            DPRINTF("%s: Single Byte Program\n", __func__); +            break; +        case 0x20: /* Block erase */ +            p = pfl->storage; +            offset &= ~(pfl->sector_len - 1); + +            DPRINTF("%s: block erase at " TARGET_FMT_plx " bytes %x\n", +                    __func__, offset, (unsigned)pfl->sector_len); + +            if (!pfl->ro) { +                memset(p + offset, 0xff, pfl->sector_len); +                pflash_update(pfl, offset, pfl->sector_len); +            } else { +                pfl->status |= 0x20; /* Block erase error */ +            } +            pfl->status |= 0x80; /* Ready! */ +            break; +        case 0x50: /* Clear status bits */ +            DPRINTF("%s: Clear status bits\n", __func__); +            pfl->status = 0x0; +            goto reset_flash; +        case 0x60: /* Block (un)lock */ +            DPRINTF("%s: Block unlock\n", __func__); +            break; +        case 0x70: /* Status Register */ +            DPRINTF("%s: Read status register\n", __func__); +            pfl->cmd = cmd; +            return; +        case 0x90: /* Read Device ID */ +            DPRINTF("%s: Read Device information\n", __func__); +            pfl->cmd = cmd; +            return; +        case 0x98: /* CFI query */ +            DPRINTF("%s: CFI query\n", __func__); +            break; +        case 0xe8: /* Write to buffer */ +            DPRINTF("%s: Write to buffer\n", __func__); +            pfl->status |= 0x80; /* Ready! */ +            break; +        case 0xf0: /* Probe for AMD flash */ +            DPRINTF("%s: Probe for AMD flash\n", __func__); +            goto reset_flash; +        case 0xff: /* Read array mode */ +            DPRINTF("%s: Read array mode\n", __func__); +            goto reset_flash; +        default: +            goto error_flash; +        } +        pfl->wcycle++; +        pfl->cmd = cmd; +        break; +    case 1: +        switch (pfl->cmd) { +        case 0x10: /* Single Byte Program */ +        case 0x40: /* Single Byte Program */ +            DPRINTF("%s: Single Byte Program\n", __func__); +            if (!pfl->ro) { +                pflash_data_write(pfl, offset, value, width, be); +                pflash_update(pfl, offset, width); +            } else { +                pfl->status |= 0x10; /* Programming error */ +            } +            pfl->status |= 0x80; /* Ready! */ +            pfl->wcycle = 0; +        break; +        case 0x20: /* Block erase */ +        case 0x28: +            if (cmd == 0xd0) { /* confirm */ +                pfl->wcycle = 0; +                pfl->status |= 0x80; +            } else if (cmd == 0xff) { /* read array mode */ +                goto reset_flash; +            } else +                goto error_flash; + +            break; +        case 0xe8: +            /* Mask writeblock size based on device width, or bank width if +             * device width not specified. +             */ +            if (pfl->device_width) { +                value = extract32(value, 0, pfl->device_width * 8); +            } else { +                value = extract32(value, 0, pfl->bank_width * 8); +            } +            DPRINTF("%s: block write of %x bytes\n", __func__, value); +            pfl->counter = value; +            pfl->wcycle++; +            break; +        case 0x60: +            if (cmd == 0xd0) { +                pfl->wcycle = 0; +                pfl->status |= 0x80; +            } else if (cmd == 0x01) { +                pfl->wcycle = 0; +                pfl->status |= 0x80; +            } else if (cmd == 0xff) { +                goto reset_flash; +            } else { +                DPRINTF("%s: Unknown (un)locking command\n", __func__); +                goto reset_flash; +            } +            break; +        case 0x98: +            if (cmd == 0xff) { +                goto reset_flash; +            } else { +                DPRINTF("%s: leaving query mode\n", __func__); +            } +            break; +        default: +            goto error_flash; +        } +        break; +    case 2: +        switch (pfl->cmd) { +        case 0xe8: /* Block write */ +            if (!pfl->ro) { +                pflash_data_write(pfl, offset, value, width, be); +            } else { +                pfl->status |= 0x10; /* Programming error */ +            } + +            pfl->status |= 0x80; + +            if (!pfl->counter) { +                hwaddr mask = pfl->writeblock_size - 1; +                mask = ~mask; + +                DPRINTF("%s: block write finished\n", __func__); +                pfl->wcycle++; +                if (!pfl->ro) { +                    /* Flush the entire write buffer onto backing storage.  */ +                    pflash_update(pfl, offset & mask, pfl->writeblock_size); +                } else { +                    pfl->status |= 0x10; /* Programming error */ +                } +            } + +            pfl->counter--; +            break; +        default: +            goto error_flash; +        } +        break; +    case 3: /* Confirm mode */ +        switch (pfl->cmd) { +        case 0xe8: /* Block write */ +            if (cmd == 0xd0) { +                pfl->wcycle = 0; +                pfl->status |= 0x80; +            } else { +                DPRINTF("%s: unknown command for \"write block\"\n", __func__); +                PFLASH_BUG("Write block confirm"); +                goto reset_flash; +            } +            break; +        default: +            goto error_flash; +        } +        break; +    default: +        /* Should never happen */ +        DPRINTF("%s: invalid write state\n",  __func__); +        goto reset_flash; +    } +    return; + + error_flash: +    qemu_log_mask(LOG_UNIMP, "%s: Unimplemented flash cmd sequence " +                  "(offset " TARGET_FMT_plx ", wcycle 0x%x cmd 0x%x value 0x%x)" +                  "\n", __func__, offset, pfl->wcycle, pfl->cmd, value); + + reset_flash: +    memory_region_rom_device_set_romd(&pfl->mem, true); + +    pfl->wcycle = 0; +    pfl->cmd = 0; +} + + +static MemTxResult pflash_mem_read_with_attrs(void *opaque, hwaddr addr, uint64_t *value, +                                              unsigned len, MemTxAttrs attrs) +{ +    pflash_t *pfl = opaque; +    bool be = !!(pfl->features & (1 << PFLASH_BE)); + +    if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) { +        *value = pflash_data_read(opaque, addr, len, be); +    } else { +        *value = pflash_read(opaque, addr, len, be); +    } +    return MEMTX_OK; +} + +static MemTxResult pflash_mem_write_with_attrs(void *opaque, hwaddr addr, uint64_t value, +                                               unsigned len, MemTxAttrs attrs) +{ +    pflash_t *pfl = opaque; +    bool be = !!(pfl->features & (1 << PFLASH_BE)); + +    if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) { +        return MEMTX_ERROR; +    } else { +        pflash_write(opaque, addr, value, len, be); +        return MEMTX_OK; +    } +} + +static const MemoryRegionOps pflash_cfi01_ops = { +    .read_with_attrs = pflash_mem_read_with_attrs, +    .write_with_attrs = pflash_mem_write_with_attrs, +    .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void pflash_cfi01_realize(DeviceState *dev, Error **errp) +{ +    pflash_t *pfl = CFI_PFLASH01(dev); +    uint64_t total_len; +    int ret; +    uint64_t blocks_per_device, device_len; +    int num_devices; +    Error *local_err = NULL; + +    total_len = pfl->sector_len * pfl->nb_blocs; + +    /* These are only used to expose the parameters of each device +     * in the cfi_table[]. +     */ +    num_devices = pfl->device_width ? (pfl->bank_width / pfl->device_width) : 1; +    blocks_per_device = pfl->nb_blocs / num_devices; +    device_len = pfl->sector_len * blocks_per_device; + +    /* XXX: to be fixed */ +#if 0 +    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) && +        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024)) +        return NULL; +#endif + +    memory_region_init_rom_device( +        &pfl->mem, OBJECT(dev), +        &pflash_cfi01_ops, +        pfl, +        pfl->name, total_len, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        return; +    } + +    vmstate_register_ram(&pfl->mem, DEVICE(pfl)); +    pfl->storage = memory_region_get_ram_ptr(&pfl->mem); +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + +    if (pfl->blk) { +        /* read the initial flash content */ +        ret = blk_read(pfl->blk, 0, pfl->storage, total_len >> 9); + +        if (ret < 0) { +            vmstate_unregister_ram(&pfl->mem, DEVICE(pfl)); +            error_setg(errp, "failed to read the initial flash content"); +            return; +        } +    } + +    if (pfl->blk) { +        pfl->ro = blk_is_read_only(pfl->blk); +    } else { +        pfl->ro = 0; +    } + +    /* Default to devices being used at their maximum device width. This was +     * assumed before the device_width support was added. +     */ +    if (!pfl->max_device_width) { +        pfl->max_device_width = pfl->device_width; +    } + +    pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); +    pfl->wcycle = 0; +    pfl->cmd = 0; +    pfl->status = 0; +    /* Hardcoded CFI table */ +    pfl->cfi_len = 0x52; +    /* Standard "QRY" string */ +    pfl->cfi_table[0x10] = 'Q'; +    pfl->cfi_table[0x11] = 'R'; +    pfl->cfi_table[0x12] = 'Y'; +    /* Command set (Intel) */ +    pfl->cfi_table[0x13] = 0x01; +    pfl->cfi_table[0x14] = 0x00; +    /* Primary extended table address (none) */ +    pfl->cfi_table[0x15] = 0x31; +    pfl->cfi_table[0x16] = 0x00; +    /* Alternate command set (none) */ +    pfl->cfi_table[0x17] = 0x00; +    pfl->cfi_table[0x18] = 0x00; +    /* Alternate extended table (none) */ +    pfl->cfi_table[0x19] = 0x00; +    pfl->cfi_table[0x1A] = 0x00; +    /* Vcc min */ +    pfl->cfi_table[0x1B] = 0x45; +    /* Vcc max */ +    pfl->cfi_table[0x1C] = 0x55; +    /* Vpp min (no Vpp pin) */ +    pfl->cfi_table[0x1D] = 0x00; +    /* Vpp max (no Vpp pin) */ +    pfl->cfi_table[0x1E] = 0x00; +    /* Reserved */ +    pfl->cfi_table[0x1F] = 0x07; +    /* Timeout for min size buffer write */ +    pfl->cfi_table[0x20] = 0x07; +    /* Typical timeout for block erase */ +    pfl->cfi_table[0x21] = 0x0a; +    /* Typical timeout for full chip erase (4096 ms) */ +    pfl->cfi_table[0x22] = 0x00; +    /* Reserved */ +    pfl->cfi_table[0x23] = 0x04; +    /* Max timeout for buffer write */ +    pfl->cfi_table[0x24] = 0x04; +    /* Max timeout for block erase */ +    pfl->cfi_table[0x25] = 0x04; +    /* Max timeout for chip erase */ +    pfl->cfi_table[0x26] = 0x00; +    /* Device size */ +    pfl->cfi_table[0x27] = ctz32(device_len); /* + 1; */ +    /* Flash device interface (8 & 16 bits) */ +    pfl->cfi_table[0x28] = 0x02; +    pfl->cfi_table[0x29] = 0x00; +    /* Max number of bytes in multi-bytes write */ +    if (pfl->bank_width == 1) { +        pfl->cfi_table[0x2A] = 0x08; +    } else { +        pfl->cfi_table[0x2A] = 0x0B; +    } +    pfl->writeblock_size = 1 << pfl->cfi_table[0x2A]; + +    pfl->cfi_table[0x2B] = 0x00; +    /* Number of erase block regions (uniform) */ +    pfl->cfi_table[0x2C] = 0x01; +    /* Erase block region 1 */ +    pfl->cfi_table[0x2D] = blocks_per_device - 1; +    pfl->cfi_table[0x2E] = (blocks_per_device - 1) >> 8; +    pfl->cfi_table[0x2F] = pfl->sector_len >> 8; +    pfl->cfi_table[0x30] = pfl->sector_len >> 16; + +    /* Extended */ +    pfl->cfi_table[0x31] = 'P'; +    pfl->cfi_table[0x32] = 'R'; +    pfl->cfi_table[0x33] = 'I'; + +    pfl->cfi_table[0x34] = '1'; +    pfl->cfi_table[0x35] = '0'; + +    pfl->cfi_table[0x36] = 0x00; +    pfl->cfi_table[0x37] = 0x00; +    pfl->cfi_table[0x38] = 0x00; +    pfl->cfi_table[0x39] = 0x00; + +    pfl->cfi_table[0x3a] = 0x00; + +    pfl->cfi_table[0x3b] = 0x00; +    pfl->cfi_table[0x3c] = 0x00; + +    pfl->cfi_table[0x3f] = 0x01; /* Number of protection fields */ +} + +static Property pflash_cfi01_properties[] = { +    DEFINE_PROP_DRIVE("drive", struct pflash_t, blk), +    /* num-blocks is the number of blocks actually visible to the guest, +     * ie the total size of the device divided by the sector length. +     * If we're emulating flash devices wired in parallel the actual +     * number of blocks per indvidual device will differ. +     */ +    DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), +    DEFINE_PROP_UINT64("sector-length", struct pflash_t, sector_len, 0), +    /* width here is the overall width of this QEMU device in bytes. +     * The QEMU device may be emulating a number of flash devices +     * wired up in parallel; the width of each individual flash +     * device should be specified via device-width. If the individual +     * devices have a maximum width which is greater than the width +     * they are being used for, this maximum width should be set via +     * max-device-width (which otherwise defaults to device-width). +     * So for instance a 32-bit wide QEMU flash device made from four +     * 16-bit flash devices used in 8-bit wide mode would be configured +     * with width = 4, device-width = 1, max-device-width = 2. +     * +     * If device-width is not specified we default to backwards +     * compatible behaviour which is a bad emulation of two +     * 16 bit devices making up a 32 bit wide QEMU device. This +     * is deprecated for new uses of this device. +     */ +    DEFINE_PROP_UINT8("width", struct pflash_t, bank_width, 0), +    DEFINE_PROP_UINT8("device-width", struct pflash_t, device_width, 0), +    DEFINE_PROP_UINT8("max-device-width", struct pflash_t, max_device_width, 0), +    DEFINE_PROP_BIT("big-endian", struct pflash_t, features, PFLASH_BE, 0), +    DEFINE_PROP_BIT("secure", struct pflash_t, features, PFLASH_SECURE, 0), +    DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), +    DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), +    DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), +    DEFINE_PROP_UINT16("id3", struct pflash_t, ident3, 0), +    DEFINE_PROP_STRING("name", struct pflash_t, name), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void pflash_cfi01_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->realize = pflash_cfi01_realize; +    dc->props = pflash_cfi01_properties; +    dc->vmsd = &vmstate_pflash; +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + + +static const TypeInfo pflash_cfi01_info = { +    .name           = TYPE_CFI_PFLASH01, +    .parent         = TYPE_SYS_BUS_DEVICE, +    .instance_size  = sizeof(struct pflash_t), +    .class_init     = pflash_cfi01_class_init, +}; + +static void pflash_cfi01_register_types(void) +{ +    type_register_static(&pflash_cfi01_info); +} + +type_init(pflash_cfi01_register_types) + +pflash_t *pflash_cfi01_register(hwaddr base, +                                DeviceState *qdev, const char *name, +                                hwaddr size, +                                BlockBackend *blk, +                                uint32_t sector_len, int nb_blocs, +                                int bank_width, uint16_t id0, uint16_t id1, +                                uint16_t id2, uint16_t id3, int be) +{ +    DeviceState *dev = qdev_create(NULL, TYPE_CFI_PFLASH01); + +    if (blk) { +        qdev_prop_set_drive(dev, "drive", blk, &error_abort); +    } +    qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); +    qdev_prop_set_uint64(dev, "sector-length", sector_len); +    qdev_prop_set_uint8(dev, "width", bank_width); +    qdev_prop_set_bit(dev, "big-endian", !!be); +    qdev_prop_set_uint16(dev, "id0", id0); +    qdev_prop_set_uint16(dev, "id1", id1); +    qdev_prop_set_uint16(dev, "id2", id2); +    qdev_prop_set_uint16(dev, "id3", id3); +    qdev_prop_set_string(dev, "name", name); +    qdev_init_nofail(dev); + +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); +    return CFI_PFLASH01(dev); +} + +MemoryRegion *pflash_cfi01_get_memory(pflash_t *fl) +{ +    return &fl->mem; +} + +static int pflash_post_load(void *opaque, int version_id) +{ +    pflash_t *pfl = opaque; + +    if (!pfl->ro) { +        DPRINTF("%s: updating bdrv for %s\n", __func__, pfl->name); +        pflash_update(pfl, 0, pfl->sector_len * pfl->nb_blocs); +    } +    return 0; +} diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c new file mode 100644 index 00000000..074a005f --- /dev/null +++ b/hw/block/pflash_cfi02.c @@ -0,0 +1,795 @@ +/* + *  CFI parallel flash with AMD command set emulation + * + *  Copyright (c) 2005 Jocelyn Mayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * For now, this code can emulate flashes of 1, 2 or 4 bytes width. + * Supported commands/modes are: + * - flash read + * - flash write + * - flash ID read + * - sector erase + * - chip erase + * - unlock bypass command + * - CFI queries + * + * It does not support flash interleaving. + * It does not implement boot blocs with reduced size + * It does not implement software data protection as found in many real chips + * It does not implement erase suspend/resume commands + * It does not implement multiple sectors erase + */ + +#include "hw/hw.h" +#include "hw/block/flash.h" +#include "qemu/timer.h" +#include "sysemu/block-backend.h" +#include "exec/address-spaces.h" +#include "qemu/host-utils.h" +#include "hw/sysbus.h" + +//#define PFLASH_DEBUG +#ifdef PFLASH_DEBUG +#define DPRINTF(fmt, ...)                                  \ +do {                                                       \ +    fprintf(stderr, "PFLASH: " fmt , ## __VA_ARGS__);       \ +} while (0) +#else +#define DPRINTF(fmt, ...) do { } while (0) +#endif + +#define PFLASH_LAZY_ROMD_THRESHOLD 42 + +#define TYPE_CFI_PFLASH02 "cfi.pflash02" +#define CFI_PFLASH02(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH02) + +struct pflash_t { +    /*< private >*/ +    SysBusDevice parent_obj; +    /*< public >*/ + +    BlockBackend *blk; +    uint32_t sector_len; +    uint32_t nb_blocs; +    uint32_t chip_len; +    uint8_t mappings; +    uint8_t width; +    uint8_t be; +    int wcycle; /* if 0, the flash is read normally */ +    int bypass; +    int ro; +    uint8_t cmd; +    uint8_t status; +    /* FIXME: implement array device properties */ +    uint16_t ident0; +    uint16_t ident1; +    uint16_t ident2; +    uint16_t ident3; +    uint16_t unlock_addr0; +    uint16_t unlock_addr1; +    uint8_t cfi_len; +    uint8_t cfi_table[0x52]; +    QEMUTimer *timer; +    /* The device replicates the flash memory across its memory space.  Emulate +     * that by having a container (.mem) filled with an array of aliases +     * (.mem_mappings) pointing to the flash memory (.orig_mem). +     */ +    MemoryRegion mem; +    MemoryRegion *mem_mappings;    /* array; one per mapping */ +    MemoryRegion orig_mem; +    int rom_mode; +    int read_counter; /* used for lazy switch-back to rom mode */ +    char *name; +    void *storage; +}; + +/* + * Set up replicated mappings of the same region. + */ +static void pflash_setup_mappings(pflash_t *pfl) +{ +    unsigned i; +    hwaddr size = memory_region_size(&pfl->orig_mem); + +    memory_region_init(&pfl->mem, OBJECT(pfl), "pflash", pfl->mappings * size); +    pfl->mem_mappings = g_new(MemoryRegion, pfl->mappings); +    for (i = 0; i < pfl->mappings; ++i) { +        memory_region_init_alias(&pfl->mem_mappings[i], OBJECT(pfl), +                                 "pflash-alias", &pfl->orig_mem, 0, size); +        memory_region_add_subregion(&pfl->mem, i * size, &pfl->mem_mappings[i]); +    } +} + +static void pflash_register_memory(pflash_t *pfl, int rom_mode) +{ +    memory_region_rom_device_set_romd(&pfl->orig_mem, rom_mode); +    pfl->rom_mode = rom_mode; +} + +static void pflash_timer (void *opaque) +{ +    pflash_t *pfl = opaque; + +    DPRINTF("%s: command %02x done\n", __func__, pfl->cmd); +    /* Reset flash */ +    pfl->status ^= 0x80; +    if (pfl->bypass) { +        pfl->wcycle = 2; +    } else { +        pflash_register_memory(pfl, 1); +        pfl->wcycle = 0; +    } +    pfl->cmd = 0; +} + +static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, +                             int width, int be) +{ +    hwaddr boff; +    uint32_t ret; +    uint8_t *p; + +    DPRINTF("%s: offset " TARGET_FMT_plx "\n", __func__, offset); +    ret = -1; +    /* Lazy reset to ROMD mode after a certain amount of read accesses */ +    if (!pfl->rom_mode && pfl->wcycle == 0 && +        ++pfl->read_counter > PFLASH_LAZY_ROMD_THRESHOLD) { +        pflash_register_memory(pfl, 1); +    } +    offset &= pfl->chip_len - 1; +    boff = offset & 0xFF; +    if (pfl->width == 2) +        boff = boff >> 1; +    else if (pfl->width == 4) +        boff = boff >> 2; +    switch (pfl->cmd) { +    default: +        /* This should never happen : reset state & treat it as a read*/ +        DPRINTF("%s: unknown command state: %x\n", __func__, pfl->cmd); +        pfl->wcycle = 0; +        pfl->cmd = 0; +        /* fall through to the read code */ +    case 0x80: +        /* We accept reads during second unlock sequence... */ +    case 0x00: +    flash_read: +        /* Flash area read */ +        p = pfl->storage; +        switch (width) { +        case 1: +            ret = p[offset]; +//            DPRINTF("%s: data offset %08x %02x\n", __func__, offset, ret); +            break; +        case 2: +            if (be) { +                ret = p[offset] << 8; +                ret |= p[offset + 1]; +            } else { +                ret = p[offset]; +                ret |= p[offset + 1] << 8; +            } +//            DPRINTF("%s: data offset %08x %04x\n", __func__, offset, ret); +            break; +        case 4: +            if (be) { +                ret = p[offset] << 24; +                ret |= p[offset + 1] << 16; +                ret |= p[offset + 2] << 8; +                ret |= p[offset + 3]; +            } else { +                ret = p[offset]; +                ret |= p[offset + 1] << 8; +                ret |= p[offset + 2] << 16; +                ret |= p[offset + 3] << 24; +            } +//            DPRINTF("%s: data offset %08x %08x\n", __func__, offset, ret); +            break; +        } +        break; +    case 0x90: +        /* flash ID read */ +        switch (boff) { +        case 0x00: +        case 0x01: +            ret = boff & 0x01 ? pfl->ident1 : pfl->ident0; +            break; +        case 0x02: +            ret = 0x00; /* Pretend all sectors are unprotected */ +            break; +        case 0x0E: +        case 0x0F: +            ret = boff & 0x01 ? pfl->ident3 : pfl->ident2; +            if (ret == (uint8_t)-1) { +                goto flash_read; +            } +            break; +        default: +            goto flash_read; +        } +        DPRINTF("%s: ID " TARGET_FMT_plx " %x\n", __func__, boff, ret); +        break; +    case 0xA0: +    case 0x10: +    case 0x30: +        /* Status register read */ +        ret = pfl->status; +        DPRINTF("%s: status %x\n", __func__, ret); +        /* Toggle bit 6 */ +        pfl->status ^= 0x40; +        break; +    case 0x98: +        /* CFI query mode */ +        if (boff > pfl->cfi_len) +            ret = 0; +        else +            ret = pfl->cfi_table[boff]; +        break; +    } + +    return ret; +} + +/* update flash content on disk */ +static void pflash_update(pflash_t *pfl, int offset, +                          int size) +{ +    int offset_end; +    if (pfl->blk) { +        offset_end = offset + size; +        /* round to sectors */ +        offset = offset >> 9; +        offset_end = (offset_end + 511) >> 9; +        blk_write(pfl->blk, offset, pfl->storage + (offset << 9), +                  offset_end - offset); +    } +} + +static void pflash_write (pflash_t *pfl, hwaddr offset, +                          uint32_t value, int width, int be) +{ +    hwaddr boff; +    uint8_t *p; +    uint8_t cmd; + +    cmd = value; +    if (pfl->cmd != 0xA0 && cmd == 0xF0) { +#if 0 +        DPRINTF("%s: flash reset asked (%02x %02x)\n", +                __func__, pfl->cmd, cmd); +#endif +        goto reset_flash; +    } +    DPRINTF("%s: offset " TARGET_FMT_plx " %08x %d %d\n", __func__, +            offset, value, width, pfl->wcycle); +    offset &= pfl->chip_len - 1; + +    DPRINTF("%s: offset " TARGET_FMT_plx " %08x %d\n", __func__, +            offset, value, width); +    boff = offset & (pfl->sector_len - 1); +    if (pfl->width == 2) +        boff = boff >> 1; +    else if (pfl->width == 4) +        boff = boff >> 2; +    switch (pfl->wcycle) { +    case 0: +        /* Set the device in I/O access mode if required */ +        if (pfl->rom_mode) +            pflash_register_memory(pfl, 0); +        pfl->read_counter = 0; +        /* We're in read mode */ +    check_unlock0: +        if (boff == 0x55 && cmd == 0x98) { +        enter_CFI_mode: +            /* Enter CFI query mode */ +            pfl->wcycle = 7; +            pfl->cmd = 0x98; +            return; +        } +        if (boff != pfl->unlock_addr0 || cmd != 0xAA) { +            DPRINTF("%s: unlock0 failed " TARGET_FMT_plx " %02x %04x\n", +                    __func__, boff, cmd, pfl->unlock_addr0); +            goto reset_flash; +        } +        DPRINTF("%s: unlock sequence started\n", __func__); +        break; +    case 1: +        /* We started an unlock sequence */ +    check_unlock1: +        if (boff != pfl->unlock_addr1 || cmd != 0x55) { +            DPRINTF("%s: unlock1 failed " TARGET_FMT_plx " %02x\n", __func__, +                    boff, cmd); +            goto reset_flash; +        } +        DPRINTF("%s: unlock sequence done\n", __func__); +        break; +    case 2: +        /* We finished an unlock sequence */ +        if (!pfl->bypass && boff != pfl->unlock_addr0) { +            DPRINTF("%s: command failed " TARGET_FMT_plx " %02x\n", __func__, +                    boff, cmd); +            goto reset_flash; +        } +        switch (cmd) { +        case 0x20: +            pfl->bypass = 1; +            goto do_bypass; +        case 0x80: +        case 0x90: +        case 0xA0: +            pfl->cmd = cmd; +            DPRINTF("%s: starting command %02x\n", __func__, cmd); +            break; +        default: +            DPRINTF("%s: unknown command %02x\n", __func__, cmd); +            goto reset_flash; +        } +        break; +    case 3: +        switch (pfl->cmd) { +        case 0x80: +            /* We need another unlock sequence */ +            goto check_unlock0; +        case 0xA0: +            DPRINTF("%s: write data offset " TARGET_FMT_plx " %08x %d\n", +                    __func__, offset, value, width); +            p = pfl->storage; +            if (!pfl->ro) { +                switch (width) { +                case 1: +                    p[offset] &= value; +                    pflash_update(pfl, offset, 1); +                    break; +                case 2: +                    if (be) { +                        p[offset] &= value >> 8; +                        p[offset + 1] &= value; +                    } else { +                        p[offset] &= value; +                        p[offset + 1] &= value >> 8; +                    } +                    pflash_update(pfl, offset, 2); +                    break; +                case 4: +                    if (be) { +                        p[offset] &= value >> 24; +                        p[offset + 1] &= value >> 16; +                        p[offset + 2] &= value >> 8; +                        p[offset + 3] &= value; +                    } else { +                        p[offset] &= value; +                        p[offset + 1] &= value >> 8; +                        p[offset + 2] &= value >> 16; +                        p[offset + 3] &= value >> 24; +                    } +                    pflash_update(pfl, offset, 4); +                    break; +                } +            } +            pfl->status = 0x00 | ~(value & 0x80); +            /* Let's pretend write is immediate */ +            if (pfl->bypass) +                goto do_bypass; +            goto reset_flash; +        case 0x90: +            if (pfl->bypass && cmd == 0x00) { +                /* Unlock bypass reset */ +                goto reset_flash; +            } +            /* We can enter CFI query mode from autoselect mode */ +            if (boff == 0x55 && cmd == 0x98) +                goto enter_CFI_mode; +            /* No break here */ +        default: +            DPRINTF("%s: invalid write for command %02x\n", +                    __func__, pfl->cmd); +            goto reset_flash; +        } +    case 4: +        switch (pfl->cmd) { +        case 0xA0: +            /* Ignore writes while flash data write is occurring */ +            /* As we suppose write is immediate, this should never happen */ +            return; +        case 0x80: +            goto check_unlock1; +        default: +            /* Should never happen */ +            DPRINTF("%s: invalid command state %02x (wc 4)\n", +                    __func__, pfl->cmd); +            goto reset_flash; +        } +        break; +    case 5: +        switch (cmd) { +        case 0x10: +            if (boff != pfl->unlock_addr0) { +                DPRINTF("%s: chip erase: invalid address " TARGET_FMT_plx "\n", +                        __func__, offset); +                goto reset_flash; +            } +            /* Chip erase */ +            DPRINTF("%s: start chip erase\n", __func__); +            if (!pfl->ro) { +                memset(pfl->storage, 0xFF, pfl->chip_len); +                pflash_update(pfl, 0, pfl->chip_len); +            } +            pfl->status = 0x00; +            /* Let's wait 5 seconds before chip erase is done */ +            timer_mod(pfl->timer, +                           qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() * 5)); +            break; +        case 0x30: +            /* Sector erase */ +            p = pfl->storage; +            offset &= ~(pfl->sector_len - 1); +            DPRINTF("%s: start sector erase at " TARGET_FMT_plx "\n", __func__, +                    offset); +            if (!pfl->ro) { +                memset(p + offset, 0xFF, pfl->sector_len); +                pflash_update(pfl, offset, pfl->sector_len); +            } +            pfl->status = 0x00; +            /* Let's wait 1/2 second before sector erase is done */ +            timer_mod(pfl->timer, +                           qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + (get_ticks_per_sec() / 2)); +            break; +        default: +            DPRINTF("%s: invalid command %02x (wc 5)\n", __func__, cmd); +            goto reset_flash; +        } +        pfl->cmd = cmd; +        break; +    case 6: +        switch (pfl->cmd) { +        case 0x10: +            /* Ignore writes during chip erase */ +            return; +        case 0x30: +            /* Ignore writes during sector erase */ +            return; +        default: +            /* Should never happen */ +            DPRINTF("%s: invalid command state %02x (wc 6)\n", +                    __func__, pfl->cmd); +            goto reset_flash; +        } +        break; +    case 7: /* Special value for CFI queries */ +        DPRINTF("%s: invalid write in CFI query mode\n", __func__); +        goto reset_flash; +    default: +        /* Should never happen */ +        DPRINTF("%s: invalid write state (wc 7)\n",  __func__); +        goto reset_flash; +    } +    pfl->wcycle++; + +    return; + +    /* Reset flash */ + reset_flash: +    pfl->bypass = 0; +    pfl->wcycle = 0; +    pfl->cmd = 0; +    return; + + do_bypass: +    pfl->wcycle = 2; +    pfl->cmd = 0; +} + + +static uint32_t pflash_readb_be(void *opaque, hwaddr addr) +{ +    return pflash_read(opaque, addr, 1, 1); +} + +static uint32_t pflash_readb_le(void *opaque, hwaddr addr) +{ +    return pflash_read(opaque, addr, 1, 0); +} + +static uint32_t pflash_readw_be(void *opaque, hwaddr addr) +{ +    pflash_t *pfl = opaque; + +    return pflash_read(pfl, addr, 2, 1); +} + +static uint32_t pflash_readw_le(void *opaque, hwaddr addr) +{ +    pflash_t *pfl = opaque; + +    return pflash_read(pfl, addr, 2, 0); +} + +static uint32_t pflash_readl_be(void *opaque, hwaddr addr) +{ +    pflash_t *pfl = opaque; + +    return pflash_read(pfl, addr, 4, 1); +} + +static uint32_t pflash_readl_le(void *opaque, hwaddr addr) +{ +    pflash_t *pfl = opaque; + +    return pflash_read(pfl, addr, 4, 0); +} + +static void pflash_writeb_be(void *opaque, hwaddr addr, +                             uint32_t value) +{ +    pflash_write(opaque, addr, value, 1, 1); +} + +static void pflash_writeb_le(void *opaque, hwaddr addr, +                             uint32_t value) +{ +    pflash_write(opaque, addr, value, 1, 0); +} + +static void pflash_writew_be(void *opaque, hwaddr addr, +                             uint32_t value) +{ +    pflash_t *pfl = opaque; + +    pflash_write(pfl, addr, value, 2, 1); +} + +static void pflash_writew_le(void *opaque, hwaddr addr, +                             uint32_t value) +{ +    pflash_t *pfl = opaque; + +    pflash_write(pfl, addr, value, 2, 0); +} + +static void pflash_writel_be(void *opaque, hwaddr addr, +                             uint32_t value) +{ +    pflash_t *pfl = opaque; + +    pflash_write(pfl, addr, value, 4, 1); +} + +static void pflash_writel_le(void *opaque, hwaddr addr, +                             uint32_t value) +{ +    pflash_t *pfl = opaque; + +    pflash_write(pfl, addr, value, 4, 0); +} + +static const MemoryRegionOps pflash_cfi02_ops_be = { +    .old_mmio = { +        .read = { pflash_readb_be, pflash_readw_be, pflash_readl_be, }, +        .write = { pflash_writeb_be, pflash_writew_be, pflash_writel_be, }, +    }, +    .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const MemoryRegionOps pflash_cfi02_ops_le = { +    .old_mmio = { +        .read = { pflash_readb_le, pflash_readw_le, pflash_readl_le, }, +        .write = { pflash_writeb_le, pflash_writew_le, pflash_writel_le, }, +    }, +    .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void pflash_cfi02_realize(DeviceState *dev, Error **errp) +{ +    pflash_t *pfl = CFI_PFLASH02(dev); +    uint32_t chip_len; +    int ret; +    Error *local_err = NULL; + +    chip_len = pfl->sector_len * pfl->nb_blocs; +    /* XXX: to be fixed */ +#if 0 +    if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) && +        total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024)) +        return NULL; +#endif + +    memory_region_init_rom_device(&pfl->orig_mem, OBJECT(pfl), pfl->be ? +                                  &pflash_cfi02_ops_be : &pflash_cfi02_ops_le, +                                  pfl, pfl->name, chip_len, &local_err); +    if (local_err) { +        error_propagate(errp, local_err); +        return; +    } + +    vmstate_register_ram(&pfl->orig_mem, DEVICE(pfl)); +    pfl->storage = memory_region_get_ram_ptr(&pfl->orig_mem); +    pfl->chip_len = chip_len; +    if (pfl->blk) { +        /* read the initial flash content */ +        ret = blk_read(pfl->blk, 0, pfl->storage, chip_len >> 9); +        if (ret < 0) { +            vmstate_unregister_ram(&pfl->orig_mem, DEVICE(pfl)); +            error_setg(errp, "failed to read the initial flash content"); +            return; +        } +    } + +    pflash_setup_mappings(pfl); +    pfl->rom_mode = 1; +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &pfl->mem); + +    if (pfl->blk) { +        pfl->ro = blk_is_read_only(pfl->blk); +    } else { +        pfl->ro = 0; +    } + +    pfl->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, pflash_timer, pfl); +    pfl->wcycle = 0; +    pfl->cmd = 0; +    pfl->status = 0; +    /* Hardcoded CFI table (mostly from SG29 Spansion flash) */ +    pfl->cfi_len = 0x52; +    /* Standard "QRY" string */ +    pfl->cfi_table[0x10] = 'Q'; +    pfl->cfi_table[0x11] = 'R'; +    pfl->cfi_table[0x12] = 'Y'; +    /* Command set (AMD/Fujitsu) */ +    pfl->cfi_table[0x13] = 0x02; +    pfl->cfi_table[0x14] = 0x00; +    /* Primary extended table address */ +    pfl->cfi_table[0x15] = 0x31; +    pfl->cfi_table[0x16] = 0x00; +    /* Alternate command set (none) */ +    pfl->cfi_table[0x17] = 0x00; +    pfl->cfi_table[0x18] = 0x00; +    /* Alternate extended table (none) */ +    pfl->cfi_table[0x19] = 0x00; +    pfl->cfi_table[0x1A] = 0x00; +    /* Vcc min */ +    pfl->cfi_table[0x1B] = 0x27; +    /* Vcc max */ +    pfl->cfi_table[0x1C] = 0x36; +    /* Vpp min (no Vpp pin) */ +    pfl->cfi_table[0x1D] = 0x00; +    /* Vpp max (no Vpp pin) */ +    pfl->cfi_table[0x1E] = 0x00; +    /* Reserved */ +    pfl->cfi_table[0x1F] = 0x07; +    /* Timeout for min size buffer write (NA) */ +    pfl->cfi_table[0x20] = 0x00; +    /* Typical timeout for block erase (512 ms) */ +    pfl->cfi_table[0x21] = 0x09; +    /* Typical timeout for full chip erase (4096 ms) */ +    pfl->cfi_table[0x22] = 0x0C; +    /* Reserved */ +    pfl->cfi_table[0x23] = 0x01; +    /* Max timeout for buffer write (NA) */ +    pfl->cfi_table[0x24] = 0x00; +    /* Max timeout for block erase */ +    pfl->cfi_table[0x25] = 0x0A; +    /* Max timeout for chip erase */ +    pfl->cfi_table[0x26] = 0x0D; +    /* Device size */ +    pfl->cfi_table[0x27] = ctz32(chip_len); +    /* Flash device interface (8 & 16 bits) */ +    pfl->cfi_table[0x28] = 0x02; +    pfl->cfi_table[0x29] = 0x00; +    /* Max number of bytes in multi-bytes write */ +    /* XXX: disable buffered write as it's not supported */ +    //    pfl->cfi_table[0x2A] = 0x05; +    pfl->cfi_table[0x2A] = 0x00; +    pfl->cfi_table[0x2B] = 0x00; +    /* Number of erase block regions (uniform) */ +    pfl->cfi_table[0x2C] = 0x01; +    /* Erase block region 1 */ +    pfl->cfi_table[0x2D] = pfl->nb_blocs - 1; +    pfl->cfi_table[0x2E] = (pfl->nb_blocs - 1) >> 8; +    pfl->cfi_table[0x2F] = pfl->sector_len >> 8; +    pfl->cfi_table[0x30] = pfl->sector_len >> 16; + +    /* Extended */ +    pfl->cfi_table[0x31] = 'P'; +    pfl->cfi_table[0x32] = 'R'; +    pfl->cfi_table[0x33] = 'I'; + +    pfl->cfi_table[0x34] = '1'; +    pfl->cfi_table[0x35] = '0'; + +    pfl->cfi_table[0x36] = 0x00; +    pfl->cfi_table[0x37] = 0x00; +    pfl->cfi_table[0x38] = 0x00; +    pfl->cfi_table[0x39] = 0x00; + +    pfl->cfi_table[0x3a] = 0x00; + +    pfl->cfi_table[0x3b] = 0x00; +    pfl->cfi_table[0x3c] = 0x00; +} + +static Property pflash_cfi02_properties[] = { +    DEFINE_PROP_DRIVE("drive", struct pflash_t, blk), +    DEFINE_PROP_UINT32("num-blocks", struct pflash_t, nb_blocs, 0), +    DEFINE_PROP_UINT32("sector-length", struct pflash_t, sector_len, 0), +    DEFINE_PROP_UINT8("width", struct pflash_t, width, 0), +    DEFINE_PROP_UINT8("mappings", struct pflash_t, mappings, 0), +    DEFINE_PROP_UINT8("big-endian", struct pflash_t, be, 0), +    DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), +    DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), +    DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), +    DEFINE_PROP_UINT16("id3", struct pflash_t, ident3, 0), +    DEFINE_PROP_UINT16("unlock-addr0", struct pflash_t, unlock_addr0, 0), +    DEFINE_PROP_UINT16("unlock-addr1", struct pflash_t, unlock_addr1, 0), +    DEFINE_PROP_STRING("name", struct pflash_t, name), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void pflash_cfi02_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); + +    dc->realize = pflash_cfi02_realize; +    dc->props = pflash_cfi02_properties; +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo pflash_cfi02_info = { +    .name           = TYPE_CFI_PFLASH02, +    .parent         = TYPE_SYS_BUS_DEVICE, +    .instance_size  = sizeof(struct pflash_t), +    .class_init     = pflash_cfi02_class_init, +}; + +static void pflash_cfi02_register_types(void) +{ +    type_register_static(&pflash_cfi02_info); +} + +type_init(pflash_cfi02_register_types) + +pflash_t *pflash_cfi02_register(hwaddr base, +                                DeviceState *qdev, const char *name, +                                hwaddr size, +                                BlockBackend *blk, uint32_t sector_len, +                                int nb_blocs, int nb_mappings, int width, +                                uint16_t id0, uint16_t id1, +                                uint16_t id2, uint16_t id3, +                                uint16_t unlock_addr0, uint16_t unlock_addr1, +                                int be) +{ +    DeviceState *dev = qdev_create(NULL, TYPE_CFI_PFLASH02); + +    if (blk) { +        qdev_prop_set_drive(dev, "drive", blk, &error_abort); +    } +    qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); +    qdev_prop_set_uint32(dev, "sector-length", sector_len); +    qdev_prop_set_uint8(dev, "width", width); +    qdev_prop_set_uint8(dev, "mappings", nb_mappings); +    qdev_prop_set_uint8(dev, "big-endian", !!be); +    qdev_prop_set_uint16(dev, "id0", id0); +    qdev_prop_set_uint16(dev, "id1", id1); +    qdev_prop_set_uint16(dev, "id2", id2); +    qdev_prop_set_uint16(dev, "id3", id3); +    qdev_prop_set_uint16(dev, "unlock-addr0", unlock_addr0); +    qdev_prop_set_uint16(dev, "unlock-addr1", unlock_addr1); +    qdev_prop_set_string(dev, "name", name); +    qdev_init_nofail(dev); + +    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); +    return CFI_PFLASH02(dev); +} diff --git a/hw/block/tc58128.c b/hw/block/tc58128.c new file mode 100644 index 00000000..728f1c3b --- /dev/null +++ b/hw/block/tc58128.c @@ -0,0 +1,180 @@ +#include "hw/hw.h" +#include "hw/sh4/sh.h" +#include "hw/loader.h" +#include "sysemu/qtest.h" +#include "qemu/error-report.h" + +#define CE1  0x0100 +#define CE2  0x0200 +#define RE   0x0400 +#define WE   0x0800 +#define ALE  0x1000 +#define CLE  0x2000 +#define RDY1 0x4000 +#define RDY2 0x8000 +#define RDY(n) ((n) == 0 ? RDY1 : RDY2) + +typedef enum { WAIT, READ1, READ2, READ3 } state_t; + +typedef struct { +    uint8_t *flash_contents; +    state_t state; +    uint32_t address; +    uint8_t address_cycle; +} tc58128_dev; + +static tc58128_dev tc58128_devs[2]; + +#define FLASH_SIZE (16*1024*1024) + +static void init_dev(tc58128_dev * dev, const char *filename) +{ +    int ret, blocks; + +    dev->state = WAIT; +    dev->flash_contents = g_malloc(FLASH_SIZE); +    memset(dev->flash_contents, 0xff, FLASH_SIZE); +    if (filename) { +	/* Load flash image skipping the first block */ +	ret = load_image(filename, dev->flash_contents + 528 * 32); +	if (ret < 0) { +            if (!qtest_enabled()) { +                error_report("Could not load flash image %s", filename); +                exit(1); +            } +	} else { +	    /* Build first block with number of blocks */ +	    blocks = (ret + 528 * 32 - 1) / (528 * 32); +	    dev->flash_contents[0] = blocks & 0xff; +	    dev->flash_contents[1] = (blocks >> 8) & 0xff; +	    dev->flash_contents[2] = (blocks >> 16) & 0xff; +	    dev->flash_contents[3] = (blocks >> 24) & 0xff; +	    fprintf(stderr, "loaded %d bytes for %s into flash\n", ret, +		    filename); +	} +    } +} + +static void handle_command(tc58128_dev * dev, uint8_t command) +{ +    switch (command) { +    case 0xff: +	fprintf(stderr, "reset flash device\n"); +	dev->state = WAIT; +	break; +    case 0x00: +	fprintf(stderr, "read mode 1\n"); +	dev->state = READ1; +	dev->address_cycle = 0; +	break; +    case 0x01: +	fprintf(stderr, "read mode 2\n"); +	dev->state = READ2; +	dev->address_cycle = 0; +	break; +    case 0x50: +	fprintf(stderr, "read mode 3\n"); +	dev->state = READ3; +	dev->address_cycle = 0; +	break; +    default: +	fprintf(stderr, "unknown flash command 0x%02x\n", command); +        abort(); +    } +} + +static void handle_address(tc58128_dev * dev, uint8_t data) +{ +    switch (dev->state) { +    case READ1: +    case READ2: +    case READ3: +	switch (dev->address_cycle) { +	case 0: +	    dev->address = data; +	    if (dev->state == READ2) +		dev->address |= 0x100; +	    else if (dev->state == READ3) +		dev->address |= 0x200; +	    break; +	case 1: +	    dev->address += data * 528 * 0x100; +	    break; +	case 2: +	    dev->address += data * 528; +	    fprintf(stderr, "address pointer in flash: 0x%08x\n", +		    dev->address); +	    break; +	default: +	    /* Invalid data */ +            abort(); +	} +	dev->address_cycle++; +	break; +    default: +        abort(); +    } +} + +static uint8_t handle_read(tc58128_dev * dev) +{ +#if 0 +    if (dev->address % 0x100000 == 0) +	fprintf(stderr, "reading flash at address 0x%08x\n", dev->address); +#endif +    return dev->flash_contents[dev->address++]; +} + +/* We never mark the device as busy, so interrupts cannot be triggered +   XXXXX */ + +static int tc58128_cb(uint16_t porta, uint16_t portb, +                      uint16_t * periph_pdtra, uint16_t * periph_portadir, +                      uint16_t * periph_pdtrb, uint16_t * periph_portbdir) +{ +    int dev; + +    if ((porta & CE1) == 0) +	dev = 0; +    else if ((porta & CE2) == 0) +	dev = 1; +    else +	return 0;		/* No device selected */ + +    if ((porta & RE) && (porta & WE)) { +	/* Nothing to do, assert ready and return to input state */ +	*periph_portadir &= 0xff00; +	*periph_portadir |= RDY(dev); +	*periph_pdtra |= RDY(dev); +	return 1; +    } + +    if (porta & CLE) { +	/* Command */ +	assert((porta & WE) == 0); +	handle_command(&tc58128_devs[dev], porta & 0x00ff); +    } else if (porta & ALE) { +	assert((porta & WE) == 0); +	handle_address(&tc58128_devs[dev], porta & 0x00ff); +    } else if ((porta & RE) == 0) { +	*periph_portadir |= 0x00ff; +	*periph_pdtra &= 0xff00; +	*periph_pdtra |= handle_read(&tc58128_devs[dev]); +    } else { +        abort(); +    } +    return 1; +} + +static sh7750_io_device tc58128 = { +    RE | WE,			/* Port A triggers */ +    0,				/* Port B triggers */ +    tc58128_cb			/* Callback */ +}; + +int tc58128_init(struct SH7750State *s, const char *zone1, const char *zone2) +{ +    init_dev(&tc58128_devs[0], zone1); +    init_dev(&tc58128_devs[1], zone2); +    return sh7750_register_io_device(s, &tc58128); +} diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c new file mode 100644 index 00000000..5625a9fa --- /dev/null +++ b/hw/block/virtio-blk.c @@ -0,0 +1,1018 @@ +/* + * Virtio Block Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + *  Anthony Liguori   <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2.  See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "qemu/iov.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "hw/block/block.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/virtio/virtio-blk.h" +#include "dataplane/virtio-blk.h" +#include "migration/migration.h" +#include "block/scsi.h" +#ifdef __linux__ +# include <scsi/sg.h> +#endif +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" + +VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) +{ +    VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); +    req->dev = s; +    req->qiov.size = 0; +    req->in_len = 0; +    req->next = NULL; +    req->mr_next = NULL; +    return req; +} + +void virtio_blk_free_request(VirtIOBlockReq *req) +{ +    if (req) { +        g_slice_free(VirtIOBlockReq, req); +    } +} + +static void virtio_blk_complete_request(VirtIOBlockReq *req, +                                        unsigned char status) +{ +    VirtIOBlock *s = req->dev; +    VirtIODevice *vdev = VIRTIO_DEVICE(s); + +    trace_virtio_blk_req_complete(req, status); + +    stb_p(&req->in->status, status); +    virtqueue_push(s->vq, &req->elem, req->in_len); +    virtio_notify(vdev, s->vq); +} + +static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) +{ +    req->dev->complete_request(req, status); +} + +static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, +    bool is_read) +{ +    BlockErrorAction action = blk_get_error_action(req->dev->blk, +                                                   is_read, error); +    VirtIOBlock *s = req->dev; + +    if (action == BLOCK_ERROR_ACTION_STOP) { +        req->next = s->rq; +        s->rq = req; +    } else if (action == BLOCK_ERROR_ACTION_REPORT) { +        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); +        block_acct_done(blk_get_stats(s->blk), &req->acct); +        virtio_blk_free_request(req); +    } + +    blk_error_action(s->blk, action, is_read, error); +    return action != BLOCK_ERROR_ACTION_IGNORE; +} + +static void virtio_blk_rw_complete(void *opaque, int ret) +{ +    VirtIOBlockReq *next = opaque; + +    while (next) { +        VirtIOBlockReq *req = next; +        next = req->mr_next; +        trace_virtio_blk_rw_complete(req, ret); + +        if (req->qiov.nalloc != -1) { +            /* If nalloc is != 1 req->qiov is a local copy of the original +             * external iovec. It was allocated in submit_merged_requests +             * to be able to merge requests. */ +            qemu_iovec_destroy(&req->qiov); +        } + +        if (ret) { +            int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); +            bool is_read = !(p & VIRTIO_BLK_T_OUT); +            /* Note that memory may be dirtied on read failure.  If the +             * virtio request is not completed here, as is the case for +             * BLOCK_ERROR_ACTION_STOP, the memory may not be copied +             * correctly during live migration.  While this is ugly, +             * it is acceptable because the device is free to write to +             * the memory until the request is completed (which will +             * happen on the other side of the migration). +             */ +            if (virtio_blk_handle_rw_error(req, -ret, is_read)) { +                continue; +            } +        } + +        virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); +        block_acct_done(blk_get_stats(req->dev->blk), &req->acct); +        virtio_blk_free_request(req); +    } +} + +static void virtio_blk_flush_complete(void *opaque, int ret) +{ +    VirtIOBlockReq *req = opaque; + +    if (ret) { +        if (virtio_blk_handle_rw_error(req, -ret, 0)) { +            return; +        } +    } + +    virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); +    block_acct_done(blk_get_stats(req->dev->blk), &req->acct); +    virtio_blk_free_request(req); +} + +#ifdef __linux__ + +typedef struct { +    VirtIOBlockReq *req; +    struct sg_io_hdr hdr; +} VirtIOBlockIoctlReq; + +static void virtio_blk_ioctl_complete(void *opaque, int status) +{ +    VirtIOBlockIoctlReq *ioctl_req = opaque; +    VirtIOBlockReq *req = ioctl_req->req; +    VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); +    struct virtio_scsi_inhdr *scsi; +    struct sg_io_hdr *hdr; + +    scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; + +    if (status) { +        status = VIRTIO_BLK_S_UNSUPP; +        virtio_stl_p(vdev, &scsi->errors, 255); +        goto out; +    } + +    hdr = &ioctl_req->hdr; +    /* +     * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) +     * clear the masked_status field [hence status gets cleared too, see +     * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED +     * status has occurred.  However they do set DRIVER_SENSE in driver_status +     * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. +     */ +    if (hdr->status == 0 && hdr->sb_len_wr > 0) { +        hdr->status = CHECK_CONDITION; +    } + +    virtio_stl_p(vdev, &scsi->errors, +                 hdr->status | (hdr->msg_status << 8) | +                 (hdr->host_status << 16) | (hdr->driver_status << 24)); +    virtio_stl_p(vdev, &scsi->residual, hdr->resid); +    virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); +    virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); + +out: +    virtio_blk_req_complete(req, status); +    virtio_blk_free_request(req); +    g_free(ioctl_req); +} + +#endif + +static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) +{ +    VirtIOBlockReq *req = virtio_blk_alloc_request(s); + +    if (!virtqueue_pop(s->vq, &req->elem)) { +        virtio_blk_free_request(req); +        return NULL; +    } + +    return req; +} + +static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) +{ +    int status = VIRTIO_BLK_S_OK; +    struct virtio_scsi_inhdr *scsi = NULL; +    VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); +    VirtQueueElement *elem = &req->elem; +    VirtIOBlock *blk = req->dev; + +#ifdef __linux__ +    int i; +    VirtIOBlockIoctlReq *ioctl_req; +    BlockAIOCB *acb; +#endif + +    /* +     * We require at least one output segment each for the virtio_blk_outhdr +     * and the SCSI command block. +     * +     * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr +     * and the sense buffer pointer in the input segments. +     */ +    if (elem->out_num < 2 || elem->in_num < 3) { +        status = VIRTIO_BLK_S_IOERR; +        goto fail; +    } + +    /* +     * The scsi inhdr is placed in the second-to-last input segment, just +     * before the regular inhdr. +     */ +    scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; + +    if (!blk->conf.scsi) { +        status = VIRTIO_BLK_S_UNSUPP; +        goto fail; +    } + +    /* +     * No support for bidirection commands yet. +     */ +    if (elem->out_num > 2 && elem->in_num > 3) { +        status = VIRTIO_BLK_S_UNSUPP; +        goto fail; +    } + +#ifdef __linux__ +    ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); +    ioctl_req->req = req; +    ioctl_req->hdr.interface_id = 'S'; +    ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; +    ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; +    ioctl_req->hdr.dxfer_len = 0; + +    if (elem->out_num > 2) { +        /* +         * If there are more than the minimally required 2 output segments +         * there is write payload starting from the third iovec. +         */ +        ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; +        ioctl_req->hdr.iovec_count = elem->out_num - 2; + +        for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { +            ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; +        } + +        ioctl_req->hdr.dxferp = elem->out_sg + 2; + +    } else if (elem->in_num > 3) { +        /* +         * If we have more than 3 input segments the guest wants to actually +         * read data. +         */ +        ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; +        ioctl_req->hdr.iovec_count = elem->in_num - 3; +        for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { +            ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; +        } + +        ioctl_req->hdr.dxferp = elem->in_sg; +    } else { +        /* +         * Some SCSI commands don't actually transfer any data. +         */ +        ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; +    } + +    ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; +    ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; + +    acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, +                        virtio_blk_ioctl_complete, ioctl_req); +    if (!acb) { +        g_free(ioctl_req); +        status = VIRTIO_BLK_S_UNSUPP; +        goto fail; +    } +    return -EINPROGRESS; +#else +    abort(); +#endif + +fail: +    /* Just put anything nonzero so that the ioctl fails in the guest.  */ +    if (scsi) { +        virtio_stl_p(vdev, &scsi->errors, 255); +    } +    return status; +} + +static void virtio_blk_handle_scsi(VirtIOBlockReq *req) +{ +    int status; + +    status = virtio_blk_handle_scsi_req(req); +    if (status != -EINPROGRESS) { +        virtio_blk_req_complete(req, status); +        virtio_blk_free_request(req); +    } +} + +static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb, +                                   int start, int num_reqs, int niov) +{ +    QEMUIOVector *qiov = &mrb->reqs[start]->qiov; +    int64_t sector_num = mrb->reqs[start]->sector_num; +    int nb_sectors = mrb->reqs[start]->qiov.size / BDRV_SECTOR_SIZE; +    bool is_write = mrb->is_write; + +    if (num_reqs > 1) { +        int i; +        struct iovec *tmp_iov = qiov->iov; +        int tmp_niov = qiov->niov; + +        /* mrb->reqs[start]->qiov was initialized from external so we can't +         * modifiy it here. We need to initialize it locally and then add the +         * external iovecs. */ +        qemu_iovec_init(qiov, niov); + +        for (i = 0; i < tmp_niov; i++) { +            qemu_iovec_add(qiov, tmp_iov[i].iov_base, tmp_iov[i].iov_len); +        } + +        for (i = start + 1; i < start + num_reqs; i++) { +            qemu_iovec_concat(qiov, &mrb->reqs[i]->qiov, 0, +                              mrb->reqs[i]->qiov.size); +            mrb->reqs[i - 1]->mr_next = mrb->reqs[i]; +            nb_sectors += mrb->reqs[i]->qiov.size / BDRV_SECTOR_SIZE; +        } +        assert(nb_sectors == qiov->size / BDRV_SECTOR_SIZE); + +        trace_virtio_blk_submit_multireq(mrb, start, num_reqs, sector_num, +                                         nb_sectors, is_write); +        block_acct_merge_done(blk_get_stats(blk), +                              is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ, +                              num_reqs - 1); +    } + +    if (is_write) { +        blk_aio_writev(blk, sector_num, qiov, nb_sectors, +                       virtio_blk_rw_complete, mrb->reqs[start]); +    } else { +        blk_aio_readv(blk, sector_num, qiov, nb_sectors, +                      virtio_blk_rw_complete, mrb->reqs[start]); +    } +} + +static int multireq_compare(const void *a, const void *b) +{ +    const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a, +                         *req2 = *(VirtIOBlockReq **)b; + +    /* +     * Note that we can't simply subtract sector_num1 from sector_num2 +     * here as that could overflow the return value. +     */ +    if (req1->sector_num > req2->sector_num) { +        return 1; +    } else if (req1->sector_num < req2->sector_num) { +        return -1; +    } else { +        return 0; +    } +} + +void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb) +{ +    int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0; +    int max_xfer_len = 0; +    int64_t sector_num = 0; + +    if (mrb->num_reqs == 1) { +        submit_requests(blk, mrb, 0, 1, -1); +        mrb->num_reqs = 0; +        return; +    } + +    max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk); +    max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS); + +    qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs), +          &multireq_compare); + +    for (i = 0; i < mrb->num_reqs; i++) { +        VirtIOBlockReq *req = mrb->reqs[i]; +        if (num_reqs > 0) { +            bool merge = true; + +            /* merge would exceed maximum number of IOVs */ +            if (niov + req->qiov.niov > IOV_MAX) { +                merge = false; +            } + +            /* merge would exceed maximum transfer length of backend device */ +            if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) { +                merge = false; +            } + +            /* requests are not sequential */ +            if (sector_num + nb_sectors != req->sector_num) { +                merge = false; +            } + +            if (!merge) { +                submit_requests(blk, mrb, start, num_reqs, niov); +                num_reqs = 0; +            } +        } + +        if (num_reqs == 0) { +            sector_num = req->sector_num; +            nb_sectors = niov = 0; +            start = i; +        } + +        nb_sectors += req->qiov.size / BDRV_SECTOR_SIZE; +        niov += req->qiov.niov; +        num_reqs++; +    } + +    submit_requests(blk, mrb, start, num_reqs, niov); +    mrb->num_reqs = 0; +} + +static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) +{ +    block_acct_start(blk_get_stats(req->dev->blk), &req->acct, 0, +                     BLOCK_ACCT_FLUSH); + +    /* +     * Make sure all outstanding writes are posted to the backing device. +     */ +    if (mrb->is_write && mrb->num_reqs > 0) { +        virtio_blk_submit_multireq(req->dev->blk, mrb); +    } +    blk_aio_flush(req->dev->blk, virtio_blk_flush_complete, req); +} + +static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, +                                     uint64_t sector, size_t size) +{ +    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; +    uint64_t total_sectors; + +    if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) { +        return false; +    } +    if (sector & dev->sector_mask) { +        return false; +    } +    if (size % dev->conf.conf.logical_block_size) { +        return false; +    } +    blk_get_geometry(dev->blk, &total_sectors); +    if (sector > total_sectors || nb_sectors > total_sectors - sector) { +        return false; +    } +    return true; +} + +void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) +{ +    uint32_t type; +    struct iovec *in_iov = req->elem.in_sg; +    struct iovec *iov = req->elem.out_sg; +    unsigned in_num = req->elem.in_num; +    unsigned out_num = req->elem.out_num; + +    if (req->elem.out_num < 1 || req->elem.in_num < 1) { +        error_report("virtio-blk missing headers"); +        exit(1); +    } + +    if (unlikely(iov_to_buf(iov, out_num, 0, &req->out, +                            sizeof(req->out)) != sizeof(req->out))) { +        error_report("virtio-blk request outhdr too short"); +        exit(1); +    } + +    iov_discard_front(&iov, &out_num, sizeof(req->out)); + +    if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { +        error_report("virtio-blk request inhdr too short"); +        exit(1); +    } + +    /* We always touch the last byte, so just see how big in_iov is.  */ +    req->in_len = iov_size(in_iov, in_num); +    req->in = (void *)in_iov[in_num - 1].iov_base +              + in_iov[in_num - 1].iov_len +              - sizeof(struct virtio_blk_inhdr); +    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr)); + +    type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type); + +    /* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER +     * is an optional flag. Although a guest should not send this flag if +     * not negotiated we ignored it in the past. So keep ignoring it. */ +    switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { +    case VIRTIO_BLK_T_IN: +    { +        bool is_write = type & VIRTIO_BLK_T_OUT; +        req->sector_num = virtio_ldq_p(VIRTIO_DEVICE(req->dev), +                                       &req->out.sector); + +        if (is_write) { +            qemu_iovec_init_external(&req->qiov, iov, out_num); +            trace_virtio_blk_handle_write(req, req->sector_num, +                                          req->qiov.size / BDRV_SECTOR_SIZE); +        } else { +            qemu_iovec_init_external(&req->qiov, in_iov, in_num); +            trace_virtio_blk_handle_read(req, req->sector_num, +                                         req->qiov.size / BDRV_SECTOR_SIZE); +        } + +        if (!virtio_blk_sect_range_ok(req->dev, req->sector_num, +                                      req->qiov.size)) { +            virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); +            virtio_blk_free_request(req); +            return; +        } + +        block_acct_start(blk_get_stats(req->dev->blk), +                         &req->acct, req->qiov.size, +                         is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); + +        /* merge would exceed maximum number of requests or IO direction +         * changes */ +        if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS || +                                  is_write != mrb->is_write || +                                  !req->dev->conf.request_merging)) { +            virtio_blk_submit_multireq(req->dev->blk, mrb); +        } + +        assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS); +        mrb->reqs[mrb->num_reqs++] = req; +        mrb->is_write = is_write; +        break; +    } +    case VIRTIO_BLK_T_FLUSH: +        virtio_blk_handle_flush(req, mrb); +        break; +    case VIRTIO_BLK_T_SCSI_CMD: +        virtio_blk_handle_scsi(req); +        break; +    case VIRTIO_BLK_T_GET_ID: +    { +        VirtIOBlock *s = req->dev; + +        /* +         * NB: per existing s/n string convention the string is +         * terminated by '\0' only when shorter than buffer. +         */ +        const char *serial = s->conf.serial ? s->conf.serial : ""; +        size_t size = MIN(strlen(serial) + 1, +                          MIN(iov_size(in_iov, in_num), +                              VIRTIO_BLK_ID_BYTES)); +        iov_from_buf(in_iov, in_num, 0, serial, size); +        virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); +        virtio_blk_free_request(req); +        break; +    } +    default: +        virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); +        virtio_blk_free_request(req); +    } +} + +static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); +    VirtIOBlockReq *req; +    MultiReqBuffer mrb = {}; + +    /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start +     * dataplane here instead of waiting for .set_status(). +     */ +    if (s->dataplane) { +        virtio_blk_data_plane_start(s->dataplane); +        return; +    } + +    while ((req = virtio_blk_get_request(s))) { +        virtio_blk_handle_request(req, &mrb); +    } + +    if (mrb.num_reqs) { +        virtio_blk_submit_multireq(s->blk, &mrb); +    } +} + +static void virtio_blk_dma_restart_bh(void *opaque) +{ +    VirtIOBlock *s = opaque; +    VirtIOBlockReq *req = s->rq; +    MultiReqBuffer mrb = {}; + +    qemu_bh_delete(s->bh); +    s->bh = NULL; + +    s->rq = NULL; + +    while (req) { +        VirtIOBlockReq *next = req->next; +        virtio_blk_handle_request(req, &mrb); +        req = next; +    } + +    if (mrb.num_reqs) { +        virtio_blk_submit_multireq(s->blk, &mrb); +    } +} + +static void virtio_blk_dma_restart_cb(void *opaque, int running, +                                      RunState state) +{ +    VirtIOBlock *s = opaque; + +    if (!running) { +        return; +    } + +    if (!s->bh) { +        s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), +                           virtio_blk_dma_restart_bh, s); +        qemu_bh_schedule(s->bh); +    } +} + +static void virtio_blk_reset(VirtIODevice *vdev) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); +    AioContext *ctx; + +    /* +     * This should cancel pending requests, but can't do nicely until there +     * are per-device request lists. +     */ +    ctx = blk_get_aio_context(s->blk); +    aio_context_acquire(ctx); +    blk_drain(s->blk); + +    if (s->dataplane) { +        virtio_blk_data_plane_stop(s->dataplane); +    } +    aio_context_release(ctx); + +    blk_set_enable_write_cache(s->blk, s->original_wce); +} + +/* coalesce internal state, copy to pci i/o region 0 + */ +static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); +    BlockConf *conf = &s->conf.conf; +    struct virtio_blk_config blkcfg; +    uint64_t capacity; +    int blk_size = conf->logical_block_size; + +    blk_get_geometry(s->blk, &capacity); +    memset(&blkcfg, 0, sizeof(blkcfg)); +    virtio_stq_p(vdev, &blkcfg.capacity, capacity); +    virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2); +    virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); +    virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); +    virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); +    virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); +    blkcfg.geometry.heads = conf->heads; +    /* +     * We must ensure that the block device capacity is a multiple of +     * the logical block size. If that is not the case, let's use +     * sector_mask to adopt the geometry to have a correct picture. +     * For those devices where the capacity is ok for the given geometry +     * we don't touch the sector value of the geometry, since some devices +     * (like s390 dasd) need a specific value. Here the capacity is already +     * cyls*heads*secs*blk_size and the sector value is not block size +     * divided by 512 - instead it is the amount of blk_size blocks +     * per track (cylinder). +     */ +    if (blk_getlength(s->blk) /  conf->heads / conf->secs % blk_size) { +        blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; +    } else { +        blkcfg.geometry.sectors = conf->secs; +    } +    blkcfg.size_max = 0; +    blkcfg.physical_block_exp = get_physical_block_exp(conf); +    blkcfg.alignment_offset = 0; +    blkcfg.wce = blk_enable_write_cache(s->blk); +    memcpy(config, &blkcfg, sizeof(struct virtio_blk_config)); +} + +static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); +    struct virtio_blk_config blkcfg; + +    memcpy(&blkcfg, config, sizeof(blkcfg)); + +    aio_context_acquire(blk_get_aio_context(s->blk)); +    blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); +    aio_context_release(blk_get_aio_context(s->blk)); +} + +static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, +                                        Error **errp) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); + +    virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); +    virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); +    virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); +    virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); +    if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { +        if (s->conf.scsi) { +            error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); +            return 0; +        } +    } else { +        virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); +        virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); +    } + +    if (s->conf.config_wce) { +        virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); +    } +    if (blk_enable_write_cache(s->blk)) { +        virtio_add_feature(&features, VIRTIO_BLK_F_WCE); +    } +    if (blk_is_read_only(s->blk)) { +        virtio_add_feature(&features, VIRTIO_BLK_F_RO); +    } + +    return features; +} + +static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); + +    if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER | +                                    VIRTIO_CONFIG_S_DRIVER_OK))) { +        virtio_blk_data_plane_stop(s->dataplane); +    } + +    if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { +        return; +    } + +    /* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send +     * cache flushes.  Thus, the "auto writethrough" behavior is never +     * necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature. +     * Leaving it enabled would break the following sequence: +     * +     *     Guest started with "-drive cache=writethrough" +     *     Guest sets status to 0 +     *     Guest sets DRIVER bit in status field +     *     Guest reads host features (WCE=0, CONFIG_WCE=1) +     *     Guest writes guest features (WCE=0, CONFIG_WCE=1) +     *     Guest writes 1 to the WCE configuration field (writeback mode) +     *     Guest sets DRIVER_OK bit in status field +     * +     * s->blk would erroneously be placed in writethrough mode. +     */ +    if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { +        aio_context_acquire(blk_get_aio_context(s->blk)); +        blk_set_enable_write_cache(s->blk, +                                   virtio_vdev_has_feature(vdev, +                                                           VIRTIO_BLK_F_WCE)); +        aio_context_release(blk_get_aio_context(s->blk)); +    } +} + +static void virtio_blk_save(QEMUFile *f, void *opaque) +{ +    VirtIODevice *vdev = VIRTIO_DEVICE(opaque); +    VirtIOBlock *s = VIRTIO_BLK(vdev); + +    if (s->dataplane) { +        virtio_blk_data_plane_stop(s->dataplane); +    } + +    virtio_save(vdev, f); +} +     +static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); +    VirtIOBlockReq *req = s->rq; + +    while (req) { +        qemu_put_sbyte(f, 1); +        qemu_put_buffer(f, (unsigned char *)&req->elem, +                        sizeof(VirtQueueElement)); +        req = req->next; +    } +    qemu_put_sbyte(f, 0); +} + +static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) +{ +    VirtIOBlock *s = opaque; +    VirtIODevice *vdev = VIRTIO_DEVICE(s); + +    if (version_id != 2) +        return -EINVAL; + +    return virtio_load(vdev, f, version_id); +} + +static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, +                                  int version_id) +{ +    VirtIOBlock *s = VIRTIO_BLK(vdev); + +    while (qemu_get_sbyte(f)) { +        VirtIOBlockReq *req = virtio_blk_alloc_request(s); +        qemu_get_buffer(f, (unsigned char *)&req->elem, +                        sizeof(VirtQueueElement)); +        req->next = s->rq; +        s->rq = req; + +        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, +            req->elem.in_num, 1); +        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, +            req->elem.out_num, 0); +    } + +    return 0; +} + +static void virtio_blk_resize(void *opaque) +{ +    VirtIODevice *vdev = VIRTIO_DEVICE(opaque); + +    virtio_notify_config(vdev); +} + +static const BlockDevOps virtio_block_ops = { +    .resize_cb = virtio_blk_resize, +}; + +/* Disable dataplane thread during live migration since it does not + * update the dirty memory bitmap yet. + */ +static void virtio_blk_migration_state_changed(Notifier *notifier, void *data) +{ +    VirtIOBlock *s = container_of(notifier, VirtIOBlock, +                                  migration_state_notifier); +    MigrationState *mig = data; +    Error *err = NULL; + +    if (migration_in_setup(mig)) { +        if (!s->dataplane) { +            return; +        } +        virtio_blk_data_plane_destroy(s->dataplane); +        s->dataplane = NULL; +    } else if (migration_has_finished(mig) || +               migration_has_failed(mig)) { +        if (s->dataplane) { +            return; +        } +        blk_drain_all(); /* complete in-flight non-dataplane requests */ +        virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->conf, +                                     &s->dataplane, &err); +        if (err != NULL) { +            error_report_err(err); +        } +    } +} + +static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +{ +    VirtIODevice *vdev = VIRTIO_DEVICE(dev); +    VirtIOBlock *s = VIRTIO_BLK(dev); +    VirtIOBlkConf *conf = &s->conf; +    Error *err = NULL; +    static int virtio_blk_id; + +    if (!conf->conf.blk) { +        error_setg(errp, "drive property not set"); +        return; +    } +    if (!blk_is_inserted(conf->conf.blk)) { +        error_setg(errp, "Device needs media, but drive is empty"); +        return; +    } + +    blkconf_serial(&conf->conf, &conf->serial); +    s->original_wce = blk_enable_write_cache(conf->conf.blk); +    blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err); +    if (err) { +        error_propagate(errp, err); +        return; +    } +    blkconf_blocksizes(&conf->conf); + +    virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, +                sizeof(struct virtio_blk_config)); + +    s->blk = conf->conf.blk; +    s->rq = NULL; +    s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; + +    s->vq = virtio_add_queue(vdev, 128, virtio_blk_handle_output); +    s->complete_request = virtio_blk_complete_request; +    virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); +    if (err != NULL) { +        error_propagate(errp, err); +        virtio_cleanup(vdev); +        return; +    } +    s->migration_state_notifier.notify = virtio_blk_migration_state_changed; +    add_migration_state_change_notifier(&s->migration_state_notifier); + +    s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s); +    register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, +                    virtio_blk_save, virtio_blk_load, s); +    blk_set_dev_ops(s->blk, &virtio_block_ops, s); +    blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size); + +    blk_iostatus_enable(s->blk); +} + +static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp) +{ +    VirtIODevice *vdev = VIRTIO_DEVICE(dev); +    VirtIOBlock *s = VIRTIO_BLK(dev); + +    remove_migration_state_change_notifier(&s->migration_state_notifier); +    virtio_blk_data_plane_destroy(s->dataplane); +    s->dataplane = NULL; +    qemu_del_vm_change_state_handler(s->change); +    unregister_savevm(dev, "virtio-blk", s); +    blockdev_mark_auto_del(s->blk); +    virtio_cleanup(vdev); +} + +static void virtio_blk_instance_init(Object *obj) +{ +    VirtIOBlock *s = VIRTIO_BLK(obj); + +    object_property_add_link(obj, "iothread", TYPE_IOTHREAD, +                             (Object **)&s->conf.iothread, +                             qdev_prop_allow_set_link_before_realize, +                             OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL); +    device_add_bootindex_property(obj, &s->conf.conf.bootindex, +                                  "bootindex", "/disk@0,0", +                                  DEVICE(obj), NULL); +} + +static Property virtio_blk_properties[] = { +    DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf), +    DEFINE_BLOCK_CHS_PROPERTIES(VirtIOBlock, conf.conf), +    DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), +    DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true), +#ifdef __linux__ +    DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true), +#endif +    DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, +                    true), +    DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false), +    DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_blk_class_init(ObjectClass *klass, void *data) +{ +    DeviceClass *dc = DEVICE_CLASS(klass); +    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + +    dc->props = virtio_blk_properties; +    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +    vdc->realize = virtio_blk_device_realize; +    vdc->unrealize = virtio_blk_device_unrealize; +    vdc->get_config = virtio_blk_update_config; +    vdc->set_config = virtio_blk_set_config; +    vdc->get_features = virtio_blk_get_features; +    vdc->set_status = virtio_blk_set_status; +    vdc->reset = virtio_blk_reset; +    vdc->save = virtio_blk_save_device; +    vdc->load = virtio_blk_load_device; +} + +static const TypeInfo virtio_device_info = { +    .name = TYPE_VIRTIO_BLK, +    .parent = TYPE_VIRTIO_DEVICE, +    .instance_size = sizeof(VirtIOBlock), +    .instance_init = virtio_blk_instance_init, +    .class_init = virtio_blk_class_init, +}; + +static void virtio_register_types(void) +{ +    type_register_static(&virtio_device_info); +} + +type_init(virtio_register_types) diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h new file mode 100644 index 00000000..711b6927 --- /dev/null +++ b/hw/block/xen_blkif.h @@ -0,0 +1,115 @@ +#ifndef __XEN_BLKIF_H__ +#define __XEN_BLKIF_H__ + +#include <xen/io/ring.h> +#include <xen/io/blkif.h> +#include <xen/io/protocols.h> + +/* Not a real protocol.  Used to generate ring structs which contain + * the elements common to all protocols only.  This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places.  */ +struct blkif_common_request { +	char dummy; +}; +struct blkif_common_response { +	char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { +	uint8_t        operation;    /* BLKIF_OP_???                         */ +	uint8_t        nr_segments;  /* number of segments                   */ +	blkif_vdev_t   handle;       /* only for read/write requests         */ +	uint64_t       id;           /* private guest value, echoed in resp  */ +	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */ +	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { +	uint64_t        id;              /* copied from request */ +	uint8_t         operation;       /* copied from request */ +	int16_t         status;          /* BLKIF_RSP_???       */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { +	uint8_t        operation;    /* BLKIF_OP_???                         */ +	uint8_t        nr_segments;  /* number of segments                   */ +	blkif_vdev_t   handle;       /* only for read/write requests         */ +	uint64_t       __attribute__((__aligned__(8))) id; +	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */ +	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { +	uint64_t       __attribute__((__aligned__(8))) id; +	uint8_t         operation;       /* copied from request */ +	int16_t         status;          /* BLKIF_RSP_???       */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); + +union blkif_back_rings { +	blkif_back_ring_t        native; +	blkif_common_back_ring_t common; +        blkif_x86_32_back_ring_t x86_32_part; +        blkif_x86_64_back_ring_t x86_64_part; +}; +typedef union blkif_back_rings blkif_back_rings_t; + +enum blkif_protocol { +	BLKIF_PROTOCOL_NATIVE = 1, +	BLKIF_PROTOCOL_X86_32 = 2, +	BLKIF_PROTOCOL_X86_64 = 3, +}; + +static inline void blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +{ +	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + +	dst->operation = src->operation; +	dst->nr_segments = src->nr_segments; +	dst->handle = src->handle; +	dst->id = src->id; +	dst->sector_number = src->sector_number; +	if (src->operation == BLKIF_OP_DISCARD) { +		struct blkif_request_discard *s = (void *)src; +		struct blkif_request_discard *d = (void *)dst; +		d->nr_sectors = s->nr_sectors; +		return; +	} +	if (n > src->nr_segments) +		n = src->nr_segments; +	for (i = 0; i < n; i++) +		dst->seg[i] = src->seg[i]; +} + +static inline void blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +{ +	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + +	dst->operation = src->operation; +	dst->nr_segments = src->nr_segments; +	dst->handle = src->handle; +	dst->id = src->id; +	dst->sector_number = src->sector_number; +	if (src->operation == BLKIF_OP_DISCARD) { +		struct blkif_request_discard *s = (void *)src; +		struct blkif_request_discard *d = (void *)dst; +		d->nr_sectors = s->nr_sectors; +		return; +	} +	if (n > src->nr_segments) +		n = src->nr_segments; +	for (i = 0; i < n; i++) +		dst->seg[i] = src->seg[i]; +} + +#endif /* __XEN_BLKIF_H__ */ diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c new file mode 100644 index 00000000..267d8a8c --- /dev/null +++ b/hw/block/xen_disk.c @@ -0,0 +1,1106 @@ +/* + *  xen paravirt block device backend + * + *  (c) Gerd Hoffmann <kraxel@redhat.com> + * + *  This program is free software; you can redistribute it and/or modify + *  it under the terms of the GNU General Public License as published by + *  the Free Software Foundation; under version 2 of the License. + * + *  This program is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + *  GNU General Public License for more details. + * + *  You should have received a copy of the GNU General Public License along + *  with this program; if not, see <http://www.gnu.org/licenses/>. + * + *  Contributions after 2012-01-13 are licensed under the terms of the + *  GNU GPL, version 2 or (at your option) any later version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <inttypes.h> +#include <time.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/uio.h> + +#include "hw/hw.h" +#include "hw/xen/xen_backend.h" +#include "xen_blkif.h" +#include "sysemu/blockdev.h" +#include "sysemu/block-backend.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" + +/* ------------------------------------------------------------- */ + +static int batch_maps   = 0; + +static int max_requests = 32; + +/* ------------------------------------------------------------- */ + +#define BLOCK_SIZE  512 +#define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2) + +struct PersistentGrant { +    void *page; +    struct XenBlkDev *blkdev; +}; + +typedef struct PersistentGrant PersistentGrant; + +struct PersistentRegion { +    void *addr; +    int num; +}; + +typedef struct PersistentRegion PersistentRegion; + +struct ioreq { +    blkif_request_t     req; +    int16_t             status; + +    /* parsed request */ +    off_t               start; +    QEMUIOVector        v; +    int                 presync; +    int                 postsync; +    uint8_t             mapped; + +    /* grant mapping */ +    uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +    uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +    int                 prot; +    void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +    void                *pages; +    int                 num_unmap; + +    /* aio status */ +    int                 aio_inflight; +    int                 aio_errors; + +    struct XenBlkDev    *blkdev; +    QLIST_ENTRY(ioreq)   list; +    BlockAcctCookie     acct; +}; + +struct XenBlkDev { +    struct XenDevice    xendev;  /* must be first */ +    char                *params; +    char                *mode; +    char                *type; +    char                *dev; +    char                *devtype; +    bool                directiosafe; +    const char          *fileproto; +    const char          *filename; +    int                 ring_ref; +    void                *sring; +    int64_t             file_blk; +    int64_t             file_size; +    int                 protocol; +    blkif_back_rings_t  rings; +    int                 more_work; +    int                 cnt_map; + +    /* request lists */ +    QLIST_HEAD(inflight_head, ioreq) inflight; +    QLIST_HEAD(finished_head, ioreq) finished; +    QLIST_HEAD(freelist_head, ioreq) freelist; +    int                 requests_total; +    int                 requests_inflight; +    int                 requests_finished; + +    /* Persistent grants extension */ +    gboolean            feature_discard; +    gboolean            feature_persistent; +    GTree               *persistent_gnts; +    GSList              *persistent_regions; +    unsigned int        persistent_gnt_count; +    unsigned int        max_grants; + +    /* qemu block driver */ +    DriveInfo           *dinfo; +    BlockBackend        *blk; +    QEMUBH              *bh; +}; + +/* ------------------------------------------------------------- */ + +static void ioreq_reset(struct ioreq *ioreq) +{ +    memset(&ioreq->req, 0, sizeof(ioreq->req)); +    ioreq->status = 0; +    ioreq->start = 0; +    ioreq->presync = 0; +    ioreq->postsync = 0; +    ioreq->mapped = 0; + +    memset(ioreq->domids, 0, sizeof(ioreq->domids)); +    memset(ioreq->refs, 0, sizeof(ioreq->refs)); +    ioreq->prot = 0; +    memset(ioreq->page, 0, sizeof(ioreq->page)); +    ioreq->pages = NULL; + +    ioreq->aio_inflight = 0; +    ioreq->aio_errors = 0; + +    ioreq->blkdev = NULL; +    memset(&ioreq->list, 0, sizeof(ioreq->list)); +    memset(&ioreq->acct, 0, sizeof(ioreq->acct)); + +    qemu_iovec_reset(&ioreq->v); +} + +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ +    uint ua = GPOINTER_TO_UINT(a); +    uint ub = GPOINTER_TO_UINT(b); +    return (ua > ub) - (ua < ub); +} + +static void destroy_grant(gpointer pgnt) +{ +    PersistentGrant *grant = pgnt; +    XenGnttab gnt = grant->blkdev->xendev.gnttabdev; + +    if (xc_gnttab_munmap(gnt, grant->page, 1) != 0) { +        xen_be_printf(&grant->blkdev->xendev, 0, +                      "xc_gnttab_munmap failed: %s\n", +                      strerror(errno)); +    } +    grant->blkdev->persistent_gnt_count--; +    xen_be_printf(&grant->blkdev->xendev, 3, +                  "unmapped grant %p\n", grant->page); +    g_free(grant); +} + +static void remove_persistent_region(gpointer data, gpointer dev) +{ +    PersistentRegion *region = data; +    struct XenBlkDev *blkdev = dev; +    XenGnttab gnt = blkdev->xendev.gnttabdev; + +    if (xc_gnttab_munmap(gnt, region->addr, region->num) != 0) { +        xen_be_printf(&blkdev->xendev, 0, +                      "xc_gnttab_munmap region %p failed: %s\n", +                      region->addr, strerror(errno)); +    } +    xen_be_printf(&blkdev->xendev, 3, +                  "unmapped grant region %p with %d pages\n", +                  region->addr, region->num); +    g_free(region); +} + +static struct ioreq *ioreq_start(struct XenBlkDev *blkdev) +{ +    struct ioreq *ioreq = NULL; + +    if (QLIST_EMPTY(&blkdev->freelist)) { +        if (blkdev->requests_total >= max_requests) { +            goto out; +        } +        /* allocate new struct */ +        ioreq = g_malloc0(sizeof(*ioreq)); +        ioreq->blkdev = blkdev; +        blkdev->requests_total++; +        qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST); +    } else { +        /* get one from freelist */ +        ioreq = QLIST_FIRST(&blkdev->freelist); +        QLIST_REMOVE(ioreq, list); +    } +    QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list); +    blkdev->requests_inflight++; + +out: +    return ioreq; +} + +static void ioreq_finish(struct ioreq *ioreq) +{ +    struct XenBlkDev *blkdev = ioreq->blkdev; + +    QLIST_REMOVE(ioreq, list); +    QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list); +    blkdev->requests_inflight--; +    blkdev->requests_finished++; +} + +static void ioreq_release(struct ioreq *ioreq, bool finish) +{ +    struct XenBlkDev *blkdev = ioreq->blkdev; + +    QLIST_REMOVE(ioreq, list); +    ioreq_reset(ioreq); +    ioreq->blkdev = blkdev; +    QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list); +    if (finish) { +        blkdev->requests_finished--; +    } else { +        blkdev->requests_inflight--; +    } +} + +/* + * translate request into iovec + start offset + * do sanity checks along the way + */ +static int ioreq_parse(struct ioreq *ioreq) +{ +    struct XenBlkDev *blkdev = ioreq->blkdev; +    uintptr_t mem; +    size_t len; +    int i; + +    xen_be_printf(&blkdev->xendev, 3, +                  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n", +                  ioreq->req.operation, ioreq->req.nr_segments, +                  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number); +    switch (ioreq->req.operation) { +    case BLKIF_OP_READ: +        ioreq->prot = PROT_WRITE; /* to memory */ +        break; +    case BLKIF_OP_FLUSH_DISKCACHE: +        ioreq->presync = 1; +        if (!ioreq->req.nr_segments) { +            return 0; +        } +        /* fall through */ +    case BLKIF_OP_WRITE: +        ioreq->prot = PROT_READ; /* from memory */ +        break; +    case BLKIF_OP_DISCARD: +        return 0; +    default: +        xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n", +                      ioreq->req.operation); +        goto err; +    }; + +    if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') { +        xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n"); +        goto err; +    } + +    ioreq->start = ioreq->req.sector_number * blkdev->file_blk; +    for (i = 0; i < ioreq->req.nr_segments; i++) { +        if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) { +            xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n"); +            goto err; +        } +        if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) { +            xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n"); +            goto err; +        } +        if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) { +            xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n"); +            goto err; +        } + +        ioreq->domids[i] = blkdev->xendev.dom; +        ioreq->refs[i]   = ioreq->req.seg[i].gref; + +        mem = ioreq->req.seg[i].first_sect * blkdev->file_blk; +        len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk; +        qemu_iovec_add(&ioreq->v, (void*)mem, len); +    } +    if (ioreq->start + ioreq->v.size > blkdev->file_size) { +        xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n"); +        goto err; +    } +    return 0; + +err: +    ioreq->status = BLKIF_RSP_ERROR; +    return -1; +} + +static void ioreq_unmap(struct ioreq *ioreq) +{ +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; +    int i; + +    if (ioreq->num_unmap == 0 || ioreq->mapped == 0) { +        return; +    } +    if (batch_maps) { +        if (!ioreq->pages) { +            return; +        } +        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) { +            xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", +                          strerror(errno)); +        } +        ioreq->blkdev->cnt_map -= ioreq->num_unmap; +        ioreq->pages = NULL; +    } else { +        for (i = 0; i < ioreq->num_unmap; i++) { +            if (!ioreq->page[i]) { +                continue; +            } +            if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0) { +                xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n", +                              strerror(errno)); +            } +            ioreq->blkdev->cnt_map--; +            ioreq->page[i] = NULL; +        } +    } +    ioreq->mapped = 0; +} + +static int ioreq_map(struct ioreq *ioreq) +{ +    XenGnttab gnt = ioreq->blkdev->xendev.gnttabdev; +    uint32_t domids[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +    uint32_t refs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +    void *page[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +    int i, j, new_maps = 0; +    PersistentGrant *grant; +    PersistentRegion *region; +    /* domids and refs variables will contain the information necessary +     * to map the grants that are needed to fulfill this request. +     * +     * After mapping the needed grants, the page array will contain the +     * memory address of each granted page in the order specified in ioreq +     * (disregarding if it's a persistent grant or not). +     */ + +    if (ioreq->v.niov == 0 || ioreq->mapped == 1) { +        return 0; +    } +    if (ioreq->blkdev->feature_persistent) { +        for (i = 0; i < ioreq->v.niov; i++) { +            grant = g_tree_lookup(ioreq->blkdev->persistent_gnts, +                                    GUINT_TO_POINTER(ioreq->refs[i])); + +            if (grant != NULL) { +                page[i] = grant->page; +                xen_be_printf(&ioreq->blkdev->xendev, 3, +                              "using persistent-grant %" PRIu32 "\n", +                              ioreq->refs[i]); +            } else { +                    /* Add the grant to the list of grants that +                     * should be mapped +                     */ +                    domids[new_maps] = ioreq->domids[i]; +                    refs[new_maps] = ioreq->refs[i]; +                    page[i] = NULL; +                    new_maps++; +            } +        } +        /* Set the protection to RW, since grants may be reused later +         * with a different protection than the one needed for this request +         */ +        ioreq->prot = PROT_WRITE | PROT_READ; +    } else { +        /* All grants in the request should be mapped */ +        memcpy(refs, ioreq->refs, sizeof(refs)); +        memcpy(domids, ioreq->domids, sizeof(domids)); +        memset(page, 0, sizeof(page)); +        new_maps = ioreq->v.niov; +    } + +    if (batch_maps && new_maps) { +        ioreq->pages = xc_gnttab_map_grant_refs +            (gnt, new_maps, domids, refs, ioreq->prot); +        if (ioreq->pages == NULL) { +            xen_be_printf(&ioreq->blkdev->xendev, 0, +                          "can't map %d grant refs (%s, %d maps)\n", +                          new_maps, strerror(errno), ioreq->blkdev->cnt_map); +            return -1; +        } +        for (i = 0, j = 0; i < ioreq->v.niov; i++) { +            if (page[i] == NULL) { +                page[i] = ioreq->pages + (j++) * XC_PAGE_SIZE; +            } +        } +        ioreq->blkdev->cnt_map += new_maps; +    } else if (new_maps)  { +        for (i = 0; i < new_maps; i++) { +            ioreq->page[i] = xc_gnttab_map_grant_ref +                (gnt, domids[i], refs[i], ioreq->prot); +            if (ioreq->page[i] == NULL) { +                xen_be_printf(&ioreq->blkdev->xendev, 0, +                              "can't map grant ref %d (%s, %d maps)\n", +                              refs[i], strerror(errno), ioreq->blkdev->cnt_map); +                ioreq->mapped = 1; +                ioreq_unmap(ioreq); +                return -1; +            } +            ioreq->blkdev->cnt_map++; +        } +        for (i = 0, j = 0; i < ioreq->v.niov; i++) { +            if (page[i] == NULL) { +                page[i] = ioreq->page[j++]; +            } +        } +    } +    if (ioreq->blkdev->feature_persistent && new_maps != 0 && +        (!batch_maps || (ioreq->blkdev->persistent_gnt_count + new_maps <= +        ioreq->blkdev->max_grants))) { +        /* +         * If we are using persistent grants and batch mappings only +         * add the new maps to the list of persistent grants if the whole +         * area can be persistently mapped. +         */ +        if (batch_maps) { +            region = g_malloc0(sizeof(*region)); +            region->addr = ioreq->pages; +            region->num = new_maps; +            ioreq->blkdev->persistent_regions = g_slist_append( +                                            ioreq->blkdev->persistent_regions, +                                            region); +        } +        while ((ioreq->blkdev->persistent_gnt_count < ioreq->blkdev->max_grants) +              && new_maps) { +            /* Go through the list of newly mapped grants and add as many +             * as possible to the list of persistently mapped grants. +             * +             * Since we start at the end of ioreq->page(s), we only need +             * to decrease new_maps to prevent this granted pages from +             * being unmapped in ioreq_unmap. +             */ +            grant = g_malloc0(sizeof(*grant)); +            new_maps--; +            if (batch_maps) { +                grant->page = ioreq->pages + (new_maps) * XC_PAGE_SIZE; +            } else { +                grant->page = ioreq->page[new_maps]; +            } +            grant->blkdev = ioreq->blkdev; +            xen_be_printf(&ioreq->blkdev->xendev, 3, +                          "adding grant %" PRIu32 " page: %p\n", +                          refs[new_maps], grant->page); +            g_tree_insert(ioreq->blkdev->persistent_gnts, +                          GUINT_TO_POINTER(refs[new_maps]), +                          grant); +            ioreq->blkdev->persistent_gnt_count++; +        } +        assert(!batch_maps || new_maps == 0); +    } +    for (i = 0; i < ioreq->v.niov; i++) { +        ioreq->v.iov[i].iov_base += (uintptr_t)page[i]; +    } +    ioreq->mapped = 1; +    ioreq->num_unmap = new_maps; +    return 0; +} + +static int ioreq_runio_qemu_aio(struct ioreq *ioreq); + +static void qemu_aio_complete(void *opaque, int ret) +{ +    struct ioreq *ioreq = opaque; + +    if (ret != 0) { +        xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n", +                      ioreq->req.operation == BLKIF_OP_READ ? "read" : "write"); +        ioreq->aio_errors++; +    } + +    ioreq->aio_inflight--; +    if (ioreq->presync) { +        ioreq->presync = 0; +        ioreq_runio_qemu_aio(ioreq); +        return; +    } +    if (ioreq->aio_inflight > 0) { +        return; +    } +    if (ioreq->postsync) { +        ioreq->postsync = 0; +        ioreq->aio_inflight++; +        blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); +        return; +    } + +    ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY; +    ioreq_unmap(ioreq); +    ioreq_finish(ioreq); +    switch (ioreq->req.operation) { +    case BLKIF_OP_WRITE: +    case BLKIF_OP_FLUSH_DISKCACHE: +        if (!ioreq->req.nr_segments) { +            break; +        } +    case BLKIF_OP_READ: +        block_acct_done(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct); +        break; +    case BLKIF_OP_DISCARD: +    default: +        break; +    } +    qemu_bh_schedule(ioreq->blkdev->bh); +} + +static int ioreq_runio_qemu_aio(struct ioreq *ioreq) +{ +    struct XenBlkDev *blkdev = ioreq->blkdev; + +    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) { +        goto err_no_map; +    } + +    ioreq->aio_inflight++; +    if (ioreq->presync) { +        blk_aio_flush(ioreq->blkdev->blk, qemu_aio_complete, ioreq); +        return 0; +    } + +    switch (ioreq->req.operation) { +    case BLKIF_OP_READ: +        block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, +                         ioreq->v.size, BLOCK_ACCT_READ); +        ioreq->aio_inflight++; +        blk_aio_readv(blkdev->blk, ioreq->start / BLOCK_SIZE, +                      &ioreq->v, ioreq->v.size / BLOCK_SIZE, +                      qemu_aio_complete, ioreq); +        break; +    case BLKIF_OP_WRITE: +    case BLKIF_OP_FLUSH_DISKCACHE: +        if (!ioreq->req.nr_segments) { +            break; +        } + +        block_acct_start(blk_get_stats(blkdev->blk), &ioreq->acct, +                         ioreq->v.size, BLOCK_ACCT_WRITE); +        ioreq->aio_inflight++; +        blk_aio_writev(blkdev->blk, ioreq->start / BLOCK_SIZE, +                       &ioreq->v, ioreq->v.size / BLOCK_SIZE, +                       qemu_aio_complete, ioreq); +        break; +    case BLKIF_OP_DISCARD: +    { +        struct blkif_request_discard *discard_req = (void *)&ioreq->req; +        ioreq->aio_inflight++; +        blk_aio_discard(blkdev->blk, +                        discard_req->sector_number, discard_req->nr_sectors, +                        qemu_aio_complete, ioreq); +        break; +    } +    default: +        /* unknown operation (shouldn't happen -- parse catches this) */ +        goto err; +    } + +    qemu_aio_complete(ioreq, 0); + +    return 0; + +err: +    ioreq_unmap(ioreq); +err_no_map: +    ioreq_finish(ioreq); +    ioreq->status = BLKIF_RSP_ERROR; +    return -1; +} + +static int blk_send_response_one(struct ioreq *ioreq) +{ +    struct XenBlkDev  *blkdev = ioreq->blkdev; +    int               send_notify   = 0; +    int               have_requests = 0; +    blkif_response_t  resp; +    void              *dst; + +    resp.id        = ioreq->req.id; +    resp.operation = ioreq->req.operation; +    resp.status    = ioreq->status; + +    /* Place on the response ring for the relevant domain. */ +    switch (blkdev->protocol) { +    case BLKIF_PROTOCOL_NATIVE: +        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt); +        break; +    case BLKIF_PROTOCOL_X86_32: +        dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part, +                                blkdev->rings.x86_32_part.rsp_prod_pvt); +        break; +    case BLKIF_PROTOCOL_X86_64: +        dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part, +                                blkdev->rings.x86_64_part.rsp_prod_pvt); +        break; +    default: +        dst = NULL; +        return 0; +    } +    memcpy(dst, &resp, sizeof(resp)); +    blkdev->rings.common.rsp_prod_pvt++; + +    RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify); +    if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) { +        /* +         * Tail check for pending requests. Allows frontend to avoid +         * notifications if requests are already in flight (lower +         * overheads and promotes batching). +         */ +        RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests); +    } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) { +        have_requests = 1; +    } + +    if (have_requests) { +        blkdev->more_work++; +    } +    return send_notify; +} + +/* walk finished list, send outstanding responses, free requests */ +static void blk_send_response_all(struct XenBlkDev *blkdev) +{ +    struct ioreq *ioreq; +    int send_notify = 0; + +    while (!QLIST_EMPTY(&blkdev->finished)) { +        ioreq = QLIST_FIRST(&blkdev->finished); +        send_notify += blk_send_response_one(ioreq); +        ioreq_release(ioreq, true); +    } +    if (send_notify) { +        xen_be_send_notify(&blkdev->xendev); +    } +} + +static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc) +{ +    switch (blkdev->protocol) { +    case BLKIF_PROTOCOL_NATIVE: +        memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc), +               sizeof(ioreq->req)); +        break; +    case BLKIF_PROTOCOL_X86_32: +        blkif_get_x86_32_req(&ioreq->req, +                             RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc)); +        break; +    case BLKIF_PROTOCOL_X86_64: +        blkif_get_x86_64_req(&ioreq->req, +                             RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc)); +        break; +    } +    return 0; +} + +static void blk_handle_requests(struct XenBlkDev *blkdev) +{ +    RING_IDX rc, rp; +    struct ioreq *ioreq; + +    blkdev->more_work = 0; + +    rc = blkdev->rings.common.req_cons; +    rp = blkdev->rings.common.sring->req_prod; +    xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ + +    blk_send_response_all(blkdev); +    while (rc != rp) { +        /* pull request from ring */ +        if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc)) { +            break; +        } +        ioreq = ioreq_start(blkdev); +        if (ioreq == NULL) { +            blkdev->more_work++; +            break; +        } +        blk_get_request(blkdev, ioreq, rc); +        blkdev->rings.common.req_cons = ++rc; + +        /* parse them */ +        if (ioreq_parse(ioreq) != 0) { +            if (blk_send_response_one(ioreq)) { +                xen_be_send_notify(&blkdev->xendev); +            } +            ioreq_release(ioreq, false); +            continue; +        } + +        ioreq_runio_qemu_aio(ioreq); +    } + +    if (blkdev->more_work && blkdev->requests_inflight < max_requests) { +        qemu_bh_schedule(blkdev->bh); +    } +} + +/* ------------------------------------------------------------- */ + +static void blk_bh(void *opaque) +{ +    struct XenBlkDev *blkdev = opaque; +    blk_handle_requests(blkdev); +} + +/* + * We need to account for the grant allocations requiring contiguous + * chunks; the worst case number would be + *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1, + * but in order to keep things simple just use + *     2 * max_req * max_seg. + */ +#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg)) + +static void blk_alloc(struct XenDevice *xendev) +{ +    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + +    QLIST_INIT(&blkdev->inflight); +    QLIST_INIT(&blkdev->finished); +    QLIST_INIT(&blkdev->freelist); +    blkdev->bh = qemu_bh_new(blk_bh, blkdev); +    if (xen_mode != XEN_EMULATE) { +        batch_maps = 1; +    } +    if (xc_gnttab_set_max_grants(xendev->gnttabdev, +            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) { +        xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n", +                      strerror(errno)); +    } +} + +static void blk_parse_discard(struct XenBlkDev *blkdev) +{ +    int enable; + +    blkdev->feature_discard = true; + +    if (xenstore_read_be_int(&blkdev->xendev, "discard-enable", &enable) == 0) { +        blkdev->feature_discard = !!enable; +    } + +    if (blkdev->feature_discard) { +        xenstore_write_be_int(&blkdev->xendev, "feature-discard", 1); +    } +} + +static int blk_init(struct XenDevice *xendev) +{ +    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); +    int info = 0; +    char *directiosafe = NULL; + +    /* read xenstore entries */ +    if (blkdev->params == NULL) { +        char *h = NULL; +        blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params"); +        if (blkdev->params != NULL) { +            h = strchr(blkdev->params, ':'); +        } +        if (h != NULL) { +            blkdev->fileproto = blkdev->params; +            blkdev->filename  = h+1; +            *h = 0; +        } else { +            blkdev->fileproto = "<unset>"; +            blkdev->filename  = blkdev->params; +        } +    } +    if (!strcmp("aio", blkdev->fileproto)) { +        blkdev->fileproto = "raw"; +    } +    if (blkdev->mode == NULL) { +        blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode"); +    } +    if (blkdev->type == NULL) { +        blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type"); +    } +    if (blkdev->dev == NULL) { +        blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev"); +    } +    if (blkdev->devtype == NULL) { +        blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type"); +    } +    directiosafe = xenstore_read_be_str(&blkdev->xendev, "direct-io-safe"); +    blkdev->directiosafe = (directiosafe && atoi(directiosafe)); + +    /* do we have all we need? */ +    if (blkdev->params == NULL || +        blkdev->mode == NULL   || +        blkdev->type == NULL   || +        blkdev->dev == NULL) { +        goto out_error; +    } + +    /* read-only ? */ +    if (strcmp(blkdev->mode, "w")) { +        info  |= VDISK_READONLY; +    } + +    /* cdrom ? */ +    if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom")) { +        info  |= VDISK_CDROM; +    } + +    blkdev->file_blk  = BLOCK_SIZE; + +    /* fill info +     * blk_connect supplies sector-size and sectors +     */ +    xenstore_write_be_int(&blkdev->xendev, "feature-flush-cache", 1); +    xenstore_write_be_int(&blkdev->xendev, "feature-persistent", 1); +    xenstore_write_be_int(&blkdev->xendev, "info", info); + +    blk_parse_discard(blkdev); + +    g_free(directiosafe); +    return 0; + +out_error: +    g_free(blkdev->params); +    blkdev->params = NULL; +    g_free(blkdev->mode); +    blkdev->mode = NULL; +    g_free(blkdev->type); +    blkdev->type = NULL; +    g_free(blkdev->dev); +    blkdev->dev = NULL; +    g_free(blkdev->devtype); +    blkdev->devtype = NULL; +    g_free(directiosafe); +    blkdev->directiosafe = false; +    return -1; +} + +static int blk_connect(struct XenDevice *xendev) +{ +    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); +    int pers, index, qflags; +    bool readonly = true; + +    /* read-only ? */ +    if (blkdev->directiosafe) { +        qflags = BDRV_O_NOCACHE | BDRV_O_NATIVE_AIO; +    } else { +        qflags = BDRV_O_CACHE_WB; +    } +    if (strcmp(blkdev->mode, "w") == 0) { +        qflags |= BDRV_O_RDWR; +        readonly = false; +    } +    if (blkdev->feature_discard) { +        qflags |= BDRV_O_UNMAP; +    } + +    /* init qemu block driver */ +    index = (blkdev->xendev.dev - 202 * 256) / 16; +    blkdev->dinfo = drive_get(IF_XEN, 0, index); +    if (!blkdev->dinfo) { +        Error *local_err = NULL; +        QDict *options = NULL; + +        if (strcmp(blkdev->fileproto, "<unset>")) { +            options = qdict_new(); +            qdict_put(options, "driver", qstring_from_str(blkdev->fileproto)); +        } + +        /* setup via xenbus -> create new block driver instance */ +        xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n"); +        blkdev->blk = blk_new_open(blkdev->dev, blkdev->filename, NULL, options, +                                   qflags, &local_err); +        if (!blkdev->blk) { +            xen_be_printf(&blkdev->xendev, 0, "error: %s\n", +                          error_get_pretty(local_err)); +            error_free(local_err); +            return -1; +        } +    } else { +        /* setup via qemu cmdline -> already setup for us */ +        xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); +        blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo); +        if (blk_is_read_only(blkdev->blk) && !readonly) { +            xen_be_printf(&blkdev->xendev, 0, "Unexpected read-only drive"); +            blkdev->blk = NULL; +            return -1; +        } +        /* blkdev->blk is not create by us, we get a reference +         * so we can blk_unref() unconditionally */ +        blk_ref(blkdev->blk); +    } +    blk_attach_dev_nofail(blkdev->blk, blkdev); +    blkdev->file_size = blk_getlength(blkdev->blk); +    if (blkdev->file_size < 0) { +        xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n", +                      (int)blkdev->file_size, strerror(-blkdev->file_size), +                      bdrv_get_format_name(blk_bs(blkdev->blk)) ?: "-"); +        blkdev->file_size = 0; +    } + +    xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\"," +                  " size %" PRId64 " (%" PRId64 " MB)\n", +                  blkdev->type, blkdev->fileproto, blkdev->filename, +                  blkdev->file_size, blkdev->file_size >> 20); + +    /* Fill in number of sector size and number of sectors */ +    xenstore_write_be_int(&blkdev->xendev, "sector-size", blkdev->file_blk); +    xenstore_write_be_int64(&blkdev->xendev, "sectors", +                            blkdev->file_size / blkdev->file_blk); + +    if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1) { +        return -1; +    } +    if (xenstore_read_fe_int(&blkdev->xendev, "event-channel", +                             &blkdev->xendev.remote_port) == -1) { +        return -1; +    } +    if (xenstore_read_fe_int(&blkdev->xendev, "feature-persistent", &pers)) { +        blkdev->feature_persistent = FALSE; +    } else { +        blkdev->feature_persistent = !!pers; +    } + +    blkdev->protocol = BLKIF_PROTOCOL_NATIVE; +    if (blkdev->xendev.protocol) { +        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) { +            blkdev->protocol = BLKIF_PROTOCOL_X86_32; +        } +        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) { +            blkdev->protocol = BLKIF_PROTOCOL_X86_64; +        } +    } + +    blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev, +                                            blkdev->xendev.dom, +                                            blkdev->ring_ref, +                                            PROT_READ | PROT_WRITE); +    if (!blkdev->sring) { +        return -1; +    } +    blkdev->cnt_map++; + +    switch (blkdev->protocol) { +    case BLKIF_PROTOCOL_NATIVE: +    { +        blkif_sring_t *sring_native = blkdev->sring; +        BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE); +        break; +    } +    case BLKIF_PROTOCOL_X86_32: +    { +        blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring; + +        BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE); +        break; +    } +    case BLKIF_PROTOCOL_X86_64: +    { +        blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring; + +        BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE); +        break; +    } +    } + +    if (blkdev->feature_persistent) { +        /* Init persistent grants */ +        blkdev->max_grants = max_requests * BLKIF_MAX_SEGMENTS_PER_REQUEST; +        blkdev->persistent_gnts = g_tree_new_full((GCompareDataFunc)int_cmp, +                                             NULL, NULL, +                                             batch_maps ? +                                             (GDestroyNotify)g_free : +                                             (GDestroyNotify)destroy_grant); +        blkdev->persistent_regions = NULL; +        blkdev->persistent_gnt_count = 0; +    } + +    xen_be_bind_evtchn(&blkdev->xendev); + +    xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, " +                  "remote port %d, local port %d\n", +                  blkdev->xendev.protocol, blkdev->ring_ref, +                  blkdev->xendev.remote_port, blkdev->xendev.local_port); +    return 0; +} + +static void blk_disconnect(struct XenDevice *xendev) +{ +    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + +    if (blkdev->blk) { +        blk_detach_dev(blkdev->blk, blkdev); +        blk_unref(blkdev->blk); +        blkdev->blk = NULL; +    } +    xen_be_unbind_evtchn(&blkdev->xendev); + +    if (blkdev->sring) { +        xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1); +        blkdev->cnt_map--; +        blkdev->sring = NULL; +    } + +    /* +     * Unmap persistent grants before switching to the closed state +     * so the frontend can free them. +     * +     * In the !batch_maps case g_tree_destroy will take care of unmapping +     * the grant, but in the batch_maps case we need to iterate over every +     * region in persistent_regions and unmap it. +     */ +    if (blkdev->feature_persistent) { +        g_tree_destroy(blkdev->persistent_gnts); +        assert(batch_maps || blkdev->persistent_gnt_count == 0); +        if (batch_maps) { +            blkdev->persistent_gnt_count = 0; +            g_slist_foreach(blkdev->persistent_regions, +                            (GFunc)remove_persistent_region, blkdev); +            g_slist_free(blkdev->persistent_regions); +        } +        blkdev->feature_persistent = false; +    } +} + +static int blk_free(struct XenDevice *xendev) +{ +    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); +    struct ioreq *ioreq; + +    if (blkdev->blk || blkdev->sring) { +        blk_disconnect(xendev); +    } + +    while (!QLIST_EMPTY(&blkdev->freelist)) { +        ioreq = QLIST_FIRST(&blkdev->freelist); +        QLIST_REMOVE(ioreq, list); +        qemu_iovec_destroy(&ioreq->v); +        g_free(ioreq); +    } + +    g_free(blkdev->params); +    g_free(blkdev->mode); +    g_free(blkdev->type); +    g_free(blkdev->dev); +    g_free(blkdev->devtype); +    qemu_bh_delete(blkdev->bh); +    return 0; +} + +static void blk_event(struct XenDevice *xendev) +{ +    struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); + +    qemu_bh_schedule(blkdev->bh); +} + +struct XenDevOps xen_blkdev_ops = { +    .size       = sizeof(struct XenBlkDev), +    .flags      = DEVOPS_FLAG_NEED_GNTDEV, +    .alloc      = blk_alloc, +    .init       = blk_init, +    .initialise    = blk_connect, +    .disconnect = blk_disconnect, +    .event      = blk_event, +    .free       = blk_free, +};  | 
