diff options
Diffstat (limited to 'net/9p/trans_virtio.c')
-rw-r--r-- | net/9p/trans_virtio.c | 629 |
1 files changed, 629 insertions, 0 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c new file mode 100644 index 00000000..e317583f --- /dev/null +++ b/net/9p/trans_virtio.c @@ -0,0 +1,629 @@ +/* + * The Virtio 9p transport driver + * + * This is a block based transport driver based on the lguest block driver + * code. + * + * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation + * + * Based on virtio console driver + * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <linux/slab.h> +#include <net/9p/9p.h> +#include <linux/parser.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <linux/scatterlist.h> +#include <linux/swap.h> +#include <linux/virtio.h> +#include <linux/virtio_9p.h> +#include "trans_common.h" + +#define VIRTQUEUE_NUM 128 + +/* a single mutex to manage channel initialization and attachment */ +static DEFINE_MUTEX(virtio_9p_lock); +static DECLARE_WAIT_QUEUE_HEAD(vp_wq); +static atomic_t vp_pinned = ATOMIC_INIT(0); + +/** + * struct virtio_chan - per-instance transport information + * @initialized: whether the channel is initialized + * @inuse: whether the channel is in use + * @lock: protects multiple elements within this structure + * @client: client instance + * @vdev: virtio dev associated with this channel + * @vq: virtio queue associated with this channel + * @sg: scatter gather list which is used to pack a request (protected?) + * + * We keep all per-channel information in a structure. + * This structure is allocated within the devices dev->mem space. + * A pointer to the structure will get put in the transport private. + * + */ + +struct virtio_chan { + bool inuse; + + spinlock_t lock; + + struct p9_client *client; + struct virtio_device *vdev; + struct virtqueue *vq; + int ring_bufs_avail; + wait_queue_head_t *vc_wq; + /* This is global limit. Since we don't have a global structure, + * will be placing it in each channel. + */ + int p9_max_pages; + /* Scatterlist: can be too big for stack. */ + struct scatterlist sg[VIRTQUEUE_NUM]; + + int tag_len; + /* + * tag name to identify a mount Non-null terminated + */ + char *tag; + + struct list_head chan_list; +}; + +static struct list_head virtio_chan_list; + +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); +} + +/** + * p9_virtio_close - reclaim resources of a channel + * @client: client instance + * + * This reclaims a channel by freeing its resources and + * reseting its inuse flag. + * + */ + +static void p9_virtio_close(struct p9_client *client) +{ + struct virtio_chan *chan = client->trans; + + mutex_lock(&virtio_9p_lock); + if (chan) + chan->inuse = false; + mutex_unlock(&virtio_9p_lock); +} + +/** + * req_done - callback which signals activity from the server + * @vq: virtio queue activity was received on + * + * This notifies us that the server has triggered some activity + * on the virtio channel - most likely a response to request we + * sent. Figure out which requests now have responses and wake up + * those threads. + * + * Bugs: could do with some additional sanity checking, but appears to work. + * + */ + +static void req_done(struct virtqueue *vq) +{ + struct virtio_chan *chan = vq->vdev->priv; + struct p9_fcall *rc; + unsigned int len; + struct p9_req_t *req; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + + while (1) { + spin_lock_irqsave(&chan->lock, flags); + rc = virtqueue_get_buf(chan->vq, &len); + + if (rc == NULL) { + spin_unlock_irqrestore(&chan->lock, flags); + break; + } + + chan->ring_bufs_avail = 1; + spin_unlock_irqrestore(&chan->lock, flags); + /* Wakeup if anyone waiting for VirtIO ring space. */ + wake_up(chan->vc_wq); + P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); + P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + if (req->tc->private) { + struct trans_rpage_info *rp = req->tc->private; + int p = rp->rp_nr_pages; + /*Release pages */ + p9_release_req_pages(rp); + atomic_sub(p, &vp_pinned); + wake_up(&vp_wq); + if (rp->rp_alloc) + kfree(rp); + req->tc->private = NULL; + } + req->status = REQ_STATUS_RCVD; + p9_client_cb(chan->client, req); + } +} + +/** + * pack_sg_list - pack a scatter gather list from a linear buffer + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @limit: maximum segment to pack data to + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + * + * sg_lists have multiple segments of various sizes. This will pack + * arbitrary data into an existing scatter gather list, segmenting the + * data as necessary within constraints. + * + */ + +static int +pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, + int count) +{ + int s; + int index = start; + + while (count) { + s = rest_of_page(data); + if (s > count) + s = count; + sg_set_buf(&sg[index++], data, s); + count -= s; + data += s; + BUG_ON(index > limit); + } + + return index-start; +} + +/* We don't currently allow canceling of virtio requests */ +static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +/** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata_off: Offset into the first page + * @**pdata: a list of pages to add into sg. + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, + struct page **pdata, int count) +{ + int s; + int i = 0; + int index = start; + + if (pdata_off) { + s = min((int)(PAGE_SIZE - pdata_off), count); + sg_set_page(&sg[index++], pdata[i++], s, pdata_off); + count -= s; + } + + while (count) { + BUG_ON(index > limit); + s = min((int)PAGE_SIZE, count); + sg_set_page(&sg[index++], pdata[i++], s, 0); + count -= s; + } + return index-start; +} + +/** + * p9_virtio_request - issue a request + * @client: client instance issuing the request + * @req: request to be issued + * + */ + +static int +p9_virtio_request(struct p9_client *client, struct p9_req_t *req) +{ + int in, out, inp, outp; + struct virtio_chan *chan = client->trans; + unsigned long flags; + size_t pdata_off = 0; + struct trans_rpage_info *rpinfo = NULL; + int err, pdata_len = 0; + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + + req->status = REQ_STATUS_SENT; + + if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { + int nr_pages = p9_nr_pages(req); + int rpinfo_size = sizeof(struct trans_rpage_info) + + sizeof(struct page *) * nr_pages; + + if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { + err = wait_event_interruptible(vp_wq, + atomic_read(&vp_pinned) < chan->p9_max_pages); + if (err == -ERESTARTSYS) + return err; + P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); + } + + if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { + /* We can use sdata */ + req->tc->private = req->tc->sdata + req->tc->size; + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 0; + } else { + req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); + if (!req->tc->private) { + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " + "private kmalloc returned NULL"); + return -ENOMEM; + } + rpinfo = (struct trans_rpage_info *)req->tc->private; + rpinfo->rp_alloc = 1; + } + + err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, + req->tc->id == P9_TREAD ? 1 : 0); + if (err < 0) { + if (rpinfo->rp_alloc) + kfree(rpinfo); + return err; + } else { + atomic_add(rpinfo->rp_nr_pages, &vp_pinned); + } + } + +req_retry_pinned: + spin_lock_irqsave(&chan->lock, flags); + + /* Handle out VirtIO ring buffers */ + out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, + req->tc->size); + + if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { + /* We have additional write payload buffer to take care */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf; + if (req->tc->pubuf) + pbuf = (__force char *) req->tc->pubuf; + else + pbuf = req->tc->pkbuf; + outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, + req->tc->pbuf_size); + } + out += outp; + } + + /* Handle in VirtIO ring buffers */ + if (req->tc->pbuf_size && + ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { + /* + * Take care of additional Read payload. + * 11 is the read/write header = PDU Header(7) + IO Size (4). + * Arrange in such a way that server places header in the + * alloced memory and payload onto the user buffer. + */ + inp = pack_sg_list(chan->sg, out, + VIRTQUEUE_NUM, req->rc->sdata, 11); + /* + * Running executables in the filesystem may result in + * a read request with kernel buffer as opposed to user buffer. + */ + if (req->tc->pubuf && P9_IS_USER_CONTEXT) { + in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, + pdata_off, rpinfo->rp_data, pdata_len); + } else { + char *pbuf; + if (req->tc->pubuf) + pbuf = (__force char *) req->tc->pubuf; + else + pbuf = req->tc->pkbuf; + + in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, + pbuf, req->tc->pbuf_size); + } + in += inp; + } else { + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, + req->rc->sdata, req->rc->capacity); + } + + err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); + if (err < 0) { + if (err == -ENOSPC) { + chan->ring_bufs_avail = 0; + spin_unlock_irqrestore(&chan->lock, flags); + err = wait_event_interruptible(*chan->vc_wq, + chan->ring_bufs_avail); + if (err == -ERESTARTSYS) + return err; + + P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); + goto req_retry_pinned; + } else { + spin_unlock_irqrestore(&chan->lock, flags); + P9_DPRINTK(P9_DEBUG_TRANS, + "9p debug: " + "virtio rpc add_buf returned failure"); + if (rpinfo && rpinfo->rp_alloc) + kfree(rpinfo); + return -EIO; + } + } + + virtqueue_kick(chan->vq); + spin_unlock_irqrestore(&chan->lock, flags); + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + return 0; +} + +static ssize_t p9_mount_tag_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct virtio_chan *chan; + struct virtio_device *vdev; + + vdev = dev_to_virtio(dev); + chan = vdev->priv; + + return snprintf(buf, chan->tag_len + 1, "%s", chan->tag); +} + +static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL); + +/** + * p9_virtio_probe - probe for existence of 9P virtio channels + * @vdev: virtio device to probe + * + * This probes for existing virtio channels. + * + */ + +static int p9_virtio_probe(struct virtio_device *vdev) +{ + __u16 tag_len; + char *tag; + int err; + struct virtio_chan *chan; + + chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL); + if (!chan) { + printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); + err = -ENOMEM; + goto fail; + } + + chan->vdev = vdev; + + /* We expect one virtqueue, for requests. */ + chan->vq = virtio_find_single_vq(vdev, req_done, "requests"); + if (IS_ERR(chan->vq)) { + err = PTR_ERR(chan->vq); + goto out_free_vq; + } + chan->vq->vdev->priv = chan; + spin_lock_init(&chan->lock); + + sg_init_table(chan->sg, VIRTQUEUE_NUM); + + chan->inuse = false; + if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { + vdev->config->get(vdev, + offsetof(struct virtio_9p_config, tag_len), + &tag_len, sizeof(tag_len)); + } else { + err = -EINVAL; + goto out_free_vq; + } + tag = kmalloc(tag_len, GFP_KERNEL); + if (!tag) { + err = -ENOMEM; + goto out_free_vq; + } + vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); + chan->tag = tag; + chan->tag_len = tag_len; + err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + if (err) { + goto out_free_tag; + } + chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); + if (!chan->vc_wq) { + err = -ENOMEM; + goto out_free_tag; + } + init_waitqueue_head(chan->vc_wq); + chan->ring_bufs_avail = 1; + /* Ceiling limit to avoid denial of service attacks */ + chan->p9_max_pages = nr_free_buffer_pages()/4; + + mutex_lock(&virtio_9p_lock); + list_add_tail(&chan->chan_list, &virtio_chan_list); + mutex_unlock(&virtio_9p_lock); + return 0; + +out_free_tag: + kfree(tag); +out_free_vq: + vdev->config->del_vqs(vdev); + kfree(chan); +fail: + return err; +} + + +/** + * p9_virtio_create - allocate a new virtio channel + * @client: client instance invoking this transport + * @devname: string identifying the channel to connect to (unused) + * @args: args passed from sys_mount() for per-transport options (unused) + * + * This sets up a transport channel for 9p communication. Right now + * we only match the first available channel, but eventually we couldlook up + * alternate channels by matching devname versus a virtio_config entry. + * We use a simple reference count mechanism to ensure that only a single + * mount has a channel open at a time. + * + */ + +static int +p9_virtio_create(struct p9_client *client, const char *devname, char *args) +{ + struct virtio_chan *chan; + int ret = -ENOENT; + int found = 0; + + mutex_lock(&virtio_9p_lock); + list_for_each_entry(chan, &virtio_chan_list, chan_list) { + if (!strncmp(devname, chan->tag, chan->tag_len) && + strlen(devname) == chan->tag_len) { + if (!chan->inuse) { + chan->inuse = true; + found = 1; + break; + } + ret = -EBUSY; + } + } + mutex_unlock(&virtio_9p_lock); + + if (!found) { + printk(KERN_ERR "9p: no channels available\n"); + return ret; + } + + client->trans = (void *)chan; + client->status = Connected; + chan->client = client; + + return 0; +} + +/** + * p9_virtio_remove - clean up resources associated with a virtio device + * @vdev: virtio device to remove + * + */ + +static void p9_virtio_remove(struct virtio_device *vdev) +{ + struct virtio_chan *chan = vdev->priv; + + BUG_ON(chan->inuse); + vdev->config->del_vqs(vdev); + + mutex_lock(&virtio_9p_lock); + list_del(&chan->chan_list); + mutex_unlock(&virtio_9p_lock); + sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kfree(chan->tag); + kfree(chan->vc_wq); + kfree(chan); + +} + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static unsigned int features[] = { + VIRTIO_9P_MOUNT_TAG, +}; + +/* The standard "struct lguest_driver": */ +static struct virtio_driver p9_virtio_drv = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = p9_virtio_probe, + .remove = p9_virtio_remove, +}; + +static struct p9_trans_module p9_virtio_trans = { + .name = "virtio", + .create = p9_virtio_create, + .close = p9_virtio_close, + .request = p9_virtio_request, + .cancel = p9_virtio_cancel, + + /* + * We leave one entry for input and one entry for response + * headers. We also skip one more entry to accomodate, address + * that are not at page boundary, that can result in an extra + * page in zero copy. + */ + .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), + .pref = P9_TRANS_PREF_PAYLOAD_SEP, + .def = 0, + .owner = THIS_MODULE, +}; + +/* The standard init function */ +static int __init p9_virtio_init(void) +{ + INIT_LIST_HEAD(&virtio_chan_list); + + v9fs_register_trans(&p9_virtio_trans); + return register_virtio_driver(&p9_virtio_drv); +} + +static void __exit p9_virtio_cleanup(void) +{ + unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); +} + +module_init(p9_virtio_init); +module_exit(p9_virtio_cleanup); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_DESCRIPTION("Virtio 9p Transport"); +MODULE_LICENSE("GPL"); |