aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fuse
diff options
context:
space:
mode:
authorroot <root@artemis.panaceas.org>2015-12-25 04:40:36 +0000
committerroot <root@artemis.panaceas.org>2015-12-25 04:40:36 +0000
commit849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree6135abc790ca67dedbe07c39806591e70eda81ce /fs/fuse
downloadlinux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip
initial_commit
Diffstat (limited to 'fs/fuse')
-rw-r--r--fs/fuse/Kconfig15
-rw-r--r--fs/fuse/Makefile8
-rw-r--r--fs/fuse/control.c359
-rw-r--r--fs/fuse/cuse.c620
-rw-r--r--fs/fuse/dev.c2049
-rw-r--r--fs/fuse/dir.c1638
-rw-r--r--fs/fuse/file.c2186
-rw-r--r--fs/fuse/fuse_i.h769
-rw-r--r--fs/fuse/inode.c1263
9 files changed, 8907 insertions, 0 deletions
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
new file mode 100644
index 00000000..0cf160a9
--- /dev/null
+++ b/fs/fuse/Kconfig
@@ -0,0 +1,15 @@
+config FUSE_FS
+ tristate "FUSE (Filesystem in Userspace) support"
+ help
+ With FUSE it is possible to implement a fully functional filesystem
+ in a userspace program.
+
+ There's also companion library: libfuse. This library along with
+ utilities is available from the FUSE homepage:
+ <http://fuse.sourceforge.net/>
+
+ See <file:Documentation/filesystems/fuse.txt> for more information.
+ See <file:Documentation/Changes> for needed library/utility version.
+
+ If you want to develop a userspace FS, or if you want to use
+ a filesystem based on FUSE, answer Y or M.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 00000000..e95eeb44
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the FUSE filesystem.
+#
+
+obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
+
+fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
new file mode 100644
index 00000000..85542a7d
--- /dev/null
+++ b/fs/fuse/control.c
@@ -0,0 +1,359 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#define FUSE_CTL_SUPER_MAGIC 0x65735543
+
+/*
+ * This is non-NULL when the single instance of the control filesystem
+ * exists. Protected by fuse_mutex
+ */
+static struct super_block *fuse_control_sb;
+
+static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
+{
+ struct fuse_conn *fc;
+ mutex_lock(&fuse_mutex);
+ fc = file->f_path.dentry->d_inode->i_private;
+ if (fc)
+ fc = fuse_conn_get(fc);
+ mutex_unlock(&fuse_mutex);
+ return fc;
+}
+
+static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (fc) {
+ fuse_abort_conn(fc);
+ fuse_conn_put(fc);
+ }
+ return count;
+}
+
+static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ char tmp[32];
+ size_t size;
+
+ if (!*ppos) {
+ long value;
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (!fc)
+ return 0;
+
+ value = atomic_read(&fc->num_waiting);
+ file->private_data = (void *)value;
+ fuse_conn_put(fc);
+ }
+ size = sprintf(tmp, "%ld\n", (long)file->private_data);
+ return simple_read_from_buffer(buf, len, ppos, tmp, size);
+}
+
+static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos, unsigned val)
+{
+ char tmp[32];
+ size_t size = sprintf(tmp, "%u\n", val);
+
+ return simple_read_from_buffer(buf, len, ppos, tmp, size);
+}
+
+static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, unsigned *val,
+ unsigned global_limit)
+{
+ unsigned long t;
+ char tmp[32];
+ unsigned limit = (1 << 16) - 1;
+ int err;
+
+ if (*ppos || count >= sizeof(tmp) - 1)
+ return -EINVAL;
+
+ if (copy_from_user(tmp, buf, count))
+ return -EINVAL;
+
+ tmp[count] = '\0';
+
+ err = strict_strtoul(tmp, 0, &t);
+ if (err)
+ return err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ limit = min(limit, global_limit);
+
+ if (t > limit)
+ return -EINVAL;
+
+ *val = t;
+
+ return count;
+}
+
+static ssize_t fuse_conn_max_background_read(struct file *file,
+ char __user *buf, size_t len,
+ loff_t *ppos)
+{
+ struct fuse_conn *fc;
+ unsigned val;
+
+ fc = fuse_ctl_file_conn_get(file);
+ if (!fc)
+ return 0;
+
+ val = fc->max_background;
+ fuse_conn_put(fc);
+
+ return fuse_conn_limit_read(file, buf, len, ppos, val);
+}
+
+static ssize_t fuse_conn_max_background_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned val;
+ ssize_t ret;
+
+ ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
+ max_user_bgreq);
+ if (ret > 0) {
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (fc) {
+ fc->max_background = val;
+ fuse_conn_put(fc);
+ }
+ }
+
+ return ret;
+}
+
+static ssize_t fuse_conn_congestion_threshold_read(struct file *file,
+ char __user *buf, size_t len,
+ loff_t *ppos)
+{
+ struct fuse_conn *fc;
+ unsigned val;
+
+ fc = fuse_ctl_file_conn_get(file);
+ if (!fc)
+ return 0;
+
+ val = fc->congestion_threshold;
+ fuse_conn_put(fc);
+
+ return fuse_conn_limit_read(file, buf, len, ppos, val);
+}
+
+static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ unsigned val;
+ ssize_t ret;
+
+ ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
+ max_user_congthresh);
+ if (ret > 0) {
+ struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
+ if (fc) {
+ fc->congestion_threshold = val;
+ fuse_conn_put(fc);
+ }
+ }
+
+ return ret;
+}
+
+static const struct file_operations fuse_ctl_abort_ops = {
+ .open = nonseekable_open,
+ .write = fuse_conn_abort_write,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations fuse_ctl_waiting_ops = {
+ .open = nonseekable_open,
+ .read = fuse_conn_waiting_read,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations fuse_conn_max_background_ops = {
+ .open = nonseekable_open,
+ .read = fuse_conn_max_background_read,
+ .write = fuse_conn_max_background_write,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations fuse_conn_congestion_threshold_ops = {
+ .open = nonseekable_open,
+ .read = fuse_conn_congestion_threshold_read,
+ .write = fuse_conn_congestion_threshold_write,
+ .llseek = no_llseek,
+};
+
+static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
+ struct fuse_conn *fc,
+ const char *name,
+ int mode, int nlink,
+ const struct inode_operations *iop,
+ const struct file_operations *fop)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES);
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ return NULL;
+
+ fc->ctl_dentry[fc->ctl_ndents++] = dentry;
+ inode = new_inode(fuse_control_sb);
+ if (!inode)
+ return NULL;
+
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_uid = fc->user_id;
+ inode->i_gid = fc->group_id;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ /* setting ->i_op to NULL is not allowed */
+ if (iop)
+ inode->i_op = iop;
+ inode->i_fop = fop;
+ inode->i_nlink = nlink;
+ inode->i_private = fc;
+ d_add(dentry, inode);
+ return dentry;
+}
+
+/*
+ * Add a connection to the control filesystem (if it exists). Caller
+ * must hold fuse_mutex
+ */
+int fuse_ctl_add_conn(struct fuse_conn *fc)
+{
+ struct dentry *parent;
+ char name[32];
+
+ if (!fuse_control_sb)
+ return 0;
+
+ parent = fuse_control_sb->s_root;
+ inc_nlink(parent->d_inode);
+ sprintf(name, "%u", fc->dev);
+ parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
+ &simple_dir_inode_operations,
+ &simple_dir_operations);
+ if (!parent)
+ goto err;
+
+ if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
+ NULL, &fuse_ctl_waiting_ops) ||
+ !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
+ NULL, &fuse_ctl_abort_ops) ||
+ !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600,
+ 1, NULL, &fuse_conn_max_background_ops) ||
+ !fuse_ctl_add_dentry(parent, fc, "congestion_threshold",
+ S_IFREG | 0600, 1, NULL,
+ &fuse_conn_congestion_threshold_ops))
+ goto err;
+
+ return 0;
+
+ err:
+ fuse_ctl_remove_conn(fc);
+ return -ENOMEM;
+}
+
+/*
+ * Remove a connection from the control filesystem (if it exists).
+ * Caller must hold fuse_mutex
+ */
+void fuse_ctl_remove_conn(struct fuse_conn *fc)
+{
+ int i;
+
+ if (!fuse_control_sb)
+ return;
+
+ for (i = fc->ctl_ndents - 1; i >= 0; i--) {
+ struct dentry *dentry = fc->ctl_dentry[i];
+ dentry->d_inode->i_private = NULL;
+ d_drop(dentry);
+ dput(dentry);
+ }
+ drop_nlink(fuse_control_sb->s_root->d_inode);
+}
+
+static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct tree_descr empty_descr = {""};
+ struct fuse_conn *fc;
+ int err;
+
+ err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr);
+ if (err)
+ return err;
+
+ mutex_lock(&fuse_mutex);
+ BUG_ON(fuse_control_sb);
+ fuse_control_sb = sb;
+ list_for_each_entry(fc, &fuse_conn_list, entry) {
+ err = fuse_ctl_add_conn(fc);
+ if (err) {
+ fuse_control_sb = NULL;
+ mutex_unlock(&fuse_mutex);
+ return err;
+ }
+ }
+ mutex_unlock(&fuse_mutex);
+
+ return 0;
+}
+
+static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
+{
+ return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
+}
+
+static void fuse_ctl_kill_sb(struct super_block *sb)
+{
+ struct fuse_conn *fc;
+
+ mutex_lock(&fuse_mutex);
+ fuse_control_sb = NULL;
+ list_for_each_entry(fc, &fuse_conn_list, entry)
+ fc->ctl_ndents = 0;
+ mutex_unlock(&fuse_mutex);
+
+ kill_litter_super(sb);
+}
+
+static struct file_system_type fuse_ctl_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fusectl",
+ .mount = fuse_ctl_mount,
+ .kill_sb = fuse_ctl_kill_sb,
+};
+
+int __init fuse_ctl_init(void)
+{
+ return register_filesystem(&fuse_ctl_fs_type);
+}
+
+void fuse_ctl_cleanup(void)
+{
+ unregister_filesystem(&fuse_ctl_fs_type);
+}
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 00000000..b6cca47f
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,620 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009 SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems. On initialization /dev/cuse is
+ * created. By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device. After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn : contains fuse_conn and serves as bonding structure
+ * channel : file handle connected to the userland CUSE server
+ * cdev : the implemented character device
+ * dev : generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies. When channel is
+ * closed, everything begins to destruct. The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+
+#include "fuse_i.h"
+
+#define CUSE_CONNTBL_LEN 64
+
+struct cuse_conn {
+ struct list_head list; /* linked on cuse_conntbl */
+ struct fuse_conn fc; /* fuse connection */
+ struct cdev *cdev; /* associated character device */
+ struct device *dev; /* device representing @cdev */
+
+ /* init parameters, set once during initialization */
+ bool unrestricted_ioctl;
+};
+
+static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+ return container_of(fc, struct cuse_conn, fc);
+}
+
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+ return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+
+
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file. All other ops call FUSE ops on the
+ * FUSE file.
+ */
+
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ loff_t pos = 0;
+
+ return fuse_direct_io(file, buf, count, &pos, 0);
+}
+
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ loff_t pos = 0;
+ /*
+ * No locking or generic_write_checks(), the server is
+ * responsible for locking and sanity checks.
+ */
+ return fuse_direct_io(file, buf, count, &pos, 1);
+}
+
+static int cuse_open(struct inode *inode, struct file *file)
+{
+ dev_t devt = inode->i_cdev->dev;
+ struct cuse_conn *cc = NULL, *pos;
+ int rc;
+
+ /* look up and get the connection */
+ spin_lock(&cuse_lock);
+ list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+ if (pos->dev->devt == devt) {
+ fuse_conn_get(&pos->fc);
+ cc = pos;
+ break;
+ }
+ spin_unlock(&cuse_lock);
+
+ /* dead? */
+ if (!cc)
+ return -ENODEV;
+
+ /*
+ * Generic permission check is already done against the chrdev
+ * file, proceed to open.
+ */
+ rc = fuse_do_open(&cc->fc, 0, file, 0);
+ if (rc)
+ fuse_conn_put(&cc->fc);
+ return rc;
+}
+
+static int cuse_release(struct inode *inode, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+
+ fuse_sync_release(ff, file->f_flags);
+ fuse_conn_put(fc);
+
+ return 0;
+}
+
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_file *ff = file->private_data;
+ struct cuse_conn *cc = fc_to_cc(ff->fc);
+ unsigned int flags = 0;
+
+ if (cc->unrestricted_ioctl)
+ flags |= FUSE_IOCTL_UNRESTRICTED;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_file *ff = file->private_data;
+ struct cuse_conn *cc = fc_to_cc(ff->fc);
+ unsigned int flags = FUSE_IOCTL_COMPAT;
+
+ if (cc->unrestricted_ioctl)
+ flags |= FUSE_IOCTL_UNRESTRICTED;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static const struct file_operations cuse_frontend_fops = {
+ .owner = THIS_MODULE,
+ .read = cuse_read,
+ .write = cuse_write,
+ .open = cuse_open,
+ .release = cuse_release,
+ .unlocked_ioctl = cuse_file_ioctl,
+ .compat_ioctl = cuse_file_compat_ioctl,
+ .poll = fuse_file_poll,
+ .llseek = noop_llseek,
+};
+
+
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+
+struct cuse_devinfo {
+ const char *name;
+};
+
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1. This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value. Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'. *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+ char *p = *pp;
+ char *key, *val;
+
+ while (p < end && *p == '\0')
+ p++;
+ if (p == end)
+ return 0;
+
+ if (end[-1] != '\0') {
+ printk(KERN_ERR "CUSE: info not properly terminated\n");
+ return -EINVAL;
+ }
+
+ key = val = p;
+ p += strlen(p);
+
+ if (valp) {
+ strsep(&val, "=");
+ if (!val)
+ val = key + strlen(key);
+ key = strstrip(key);
+ val = strstrip(val);
+ } else
+ key = strstrip(key);
+
+ if (!strlen(key)) {
+ printk(KERN_ERR "CUSE: zero length info key specified\n");
+ return -EINVAL;
+ }
+
+ *pp = p;
+ *keyp = key;
+ if (valp)
+ *valp = val;
+
+ return 1;
+}
+
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo. String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+ char *end = p + len;
+ char *key, *val;
+ int rc;
+
+ while (true) {
+ rc = cuse_parse_one(&p, end, &key, &val);
+ if (rc < 0)
+ return rc;
+ if (!rc)
+ break;
+ if (strcmp(key, "DEVNAME") == 0)
+ devinfo->name = val;
+ else
+ printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+ key);
+ }
+
+ if (!devinfo->name || !strlen(devinfo->name)) {
+ printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void cuse_gendev_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it. Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct cuse_conn *cc = fc_to_cc(fc);
+ struct cuse_init_out *arg = req->out.args[0].value;
+ struct page *page = req->pages[0];
+ struct cuse_devinfo devinfo = { };
+ struct device *dev;
+ struct cdev *cdev;
+ dev_t devt;
+ int rc;
+
+ if (req->out.h.error ||
+ arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+ goto err;
+ }
+
+ fc->minor = arg->minor;
+ fc->max_read = max_t(unsigned, arg->max_read, 4096);
+ fc->max_write = max_t(unsigned, arg->max_write, 4096);
+
+ /* parse init reply */
+ cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+
+ rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+ &devinfo);
+ if (rc)
+ goto err;
+
+ /* determine and reserve devt */
+ devt = MKDEV(arg->dev_major, arg->dev_minor);
+ if (!MAJOR(devt))
+ rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+ else
+ rc = register_chrdev_region(devt, 1, devinfo.name);
+ if (rc) {
+ printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+ goto err;
+ }
+
+ /* devt determined, create device */
+ rc = -ENOMEM;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto err_region;
+
+ device_initialize(dev);
+ dev_set_uevent_suppress(dev, 1);
+ dev->class = cuse_class;
+ dev->devt = devt;
+ dev->release = cuse_gendev_release;
+ dev_set_drvdata(dev, cc);
+ dev_set_name(dev, "%s", devinfo.name);
+
+ rc = device_add(dev);
+ if (rc)
+ goto err_device;
+
+ /* register cdev */
+ rc = -ENOMEM;
+ cdev = cdev_alloc();
+ if (!cdev)
+ goto err_device;
+
+ cdev->owner = THIS_MODULE;
+ cdev->ops = &cuse_frontend_fops;
+
+ rc = cdev_add(cdev, devt, 1);
+ if (rc)
+ goto err_cdev;
+
+ cc->dev = dev;
+ cc->cdev = cdev;
+
+ /* make the device available */
+ spin_lock(&cuse_lock);
+ list_add(&cc->list, cuse_conntbl_head(devt));
+ spin_unlock(&cuse_lock);
+
+ /* announce device availability */
+ dev_set_uevent_suppress(dev, 0);
+ kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+ kfree(arg);
+ __free_page(page);
+ return;
+
+err_cdev:
+ cdev_del(cdev);
+err_device:
+ put_device(dev);
+err_region:
+ unregister_chrdev_region(devt, 1);
+err:
+ fc->conn_error = 1;
+ goto out;
+}
+
+static int cuse_send_init(struct cuse_conn *cc)
+{
+ int rc;
+ struct fuse_req *req;
+ struct page *page;
+ struct fuse_conn *fc = &cc->fc;
+ struct cuse_init_in *arg;
+ void *outarg;
+
+ BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ rc = PTR_ERR(req);
+ goto err;
+ }
+
+ rc = -ENOMEM;
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto err_put_req;
+
+ outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
+ if (!outarg)
+ goto err_free_page;
+
+ arg = &req->misc.cuse_init_in;
+ arg->major = FUSE_KERNEL_VERSION;
+ arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+ req->in.h.opcode = CUSE_INIT;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct cuse_init_in);
+ req->in.args[0].value = arg;
+ req->out.numargs = 2;
+ req->out.args[0].size = sizeof(struct cuse_init_out);
+ req->out.args[0].value = outarg;
+ req->out.args[1].size = CUSE_INIT_INFO_MAX;
+ req->out.argvar = 1;
+ req->out.argpages = 1;
+ req->pages[0] = page;
+ req->num_pages = 1;
+ req->end = cuse_process_init_reply;
+ fuse_request_send_background(fc, req);
+
+ return 0;
+
+err_free_page:
+ __free_page(page);
+err_put_req:
+ fuse_put_request(fc, req);
+err:
+ return rc;
+}
+
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+ struct cuse_conn *cc = fc_to_cc(fc);
+ kfree(cc);
+}
+
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initialization request kernel sends. This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init. The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+ struct cuse_conn *cc;
+ int rc;
+
+ /* set up cuse_conn */
+ cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+ if (!cc)
+ return -ENOMEM;
+
+ fuse_conn_init(&cc->fc);
+
+ INIT_LIST_HEAD(&cc->list);
+ cc->fc.release = cuse_fc_release;
+
+ cc->fc.connected = 1;
+ cc->fc.blocked = 0;
+ rc = cuse_send_init(cc);
+ if (rc) {
+ fuse_conn_put(&cc->fc);
+ return rc;
+ }
+ file->private_data = &cc->fc; /* channel owns base reference to cc */
+
+ return 0;
+}
+
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+ struct cuse_conn *cc = fc_to_cc(file->private_data);
+ int rc;
+
+ /* remove from the conntbl, no more access from this point on */
+ spin_lock(&cuse_lock);
+ list_del_init(&cc->list);
+ spin_unlock(&cuse_lock);
+
+ /* remove device */
+ if (cc->dev)
+ device_unregister(cc->dev);
+ if (cc->cdev) {
+ unregister_chrdev_region(cc->cdev->dev, 1);
+ cdev_del(cc->cdev);
+ }
+
+ /* kill connection and shutdown channel */
+ fuse_conn_kill(&cc->fc);
+ rc = fuse_dev_release(inode, file); /* puts the base reference */
+
+ return rc;
+}
+
+static struct file_operations cuse_channel_fops; /* initialized during init */
+
+
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+
+static ssize_t cuse_class_waiting_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cuse_conn *cc = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+
+static ssize_t cuse_class_abort_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cuse_conn *cc = dev_get_drvdata(dev);
+
+ fuse_abort_conn(&cc->fc);
+ return count;
+}
+
+static struct device_attribute cuse_class_dev_attrs[] = {
+ __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+ __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+ { }
+};
+
+static struct miscdevice cuse_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "cuse",
+ .fops = &cuse_channel_fops,
+};
+
+static int __init cuse_init(void)
+{
+ int i, rc;
+
+ /* init conntbl */
+ for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+ INIT_LIST_HEAD(&cuse_conntbl[i]);
+
+ /* inherit and extend fuse_dev_operations */
+ cuse_channel_fops = fuse_dev_operations;
+ cuse_channel_fops.owner = THIS_MODULE;
+ cuse_channel_fops.open = cuse_channel_open;
+ cuse_channel_fops.release = cuse_channel_release;
+
+ cuse_class = class_create(THIS_MODULE, "cuse");
+ if (IS_ERR(cuse_class))
+ return PTR_ERR(cuse_class);
+
+ cuse_class->dev_attrs = cuse_class_dev_attrs;
+
+ rc = misc_register(&cuse_miscdev);
+ if (rc) {
+ class_destroy(cuse_class);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void __exit cuse_exit(void)
+{
+ misc_deregister(&cuse_miscdev);
+ class_destroy(cuse_class);
+}
+
+module_init(cuse_init);
+module_exit(cuse_exit);
+
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 00000000..c858b5c8
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,2049 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uio.h>
+#include <linux/miscdevice.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/swap.h>
+#include <linux/splice.h>
+#include <linux/freezer.h>
+
+MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+MODULE_ALIAS("devname:fuse");
+
+static struct kmem_cache *fuse_req_cachep;
+
+static struct fuse_conn *fuse_get_conn(struct file *file)
+{
+ /*
+ * Lockless access is OK, because file->private data is set
+ * once during mount and is valid until the file is released.
+ */
+ return file->private_data;
+}
+
+static void fuse_request_init(struct fuse_req *req)
+{
+ memset(req, 0, sizeof(*req));
+ INIT_LIST_HEAD(&req->list);
+ INIT_LIST_HEAD(&req->intr_entry);
+ init_waitqueue_head(&req->waitq);
+ atomic_set(&req->count, 1);
+}
+
+struct fuse_req *fuse_request_alloc(void)
+{
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
+ if (req)
+ fuse_request_init(req);
+ return req;
+}
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
+
+struct fuse_req *fuse_request_alloc_nofs(void)
+{
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+ if (req)
+ fuse_request_init(req);
+ return req;
+}
+
+void fuse_request_free(struct fuse_req *req)
+{
+ kmem_cache_free(fuse_req_cachep, req);
+}
+
+static void block_sigs(sigset_t *oldset)
+{
+ sigset_t mask;
+
+ siginitsetinv(&mask, sigmask(SIGKILL));
+ sigprocmask(SIG_BLOCK, &mask, oldset);
+}
+
+static void restore_sigs(sigset_t *oldset)
+{
+ sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
+static void __fuse_get_request(struct fuse_req *req)
+{
+ atomic_inc(&req->count);
+}
+
+/* Must be called with > 1 refcount */
+static void __fuse_put_request(struct fuse_req *req)
+{
+ BUG_ON(atomic_read(&req->count) < 2);
+ atomic_dec(&req->count);
+}
+
+static void fuse_req_init_context(struct fuse_req *req)
+{
+ req->in.h.uid = current_fsuid();
+ req->in.h.gid = current_fsgid();
+ req->in.h.pid = current->pid;
+}
+
+struct fuse_req *fuse_get_req(struct fuse_conn *fc)
+{
+ struct fuse_req *req;
+ sigset_t oldset;
+ int intr;
+ int err;
+
+ atomic_inc(&fc->num_waiting);
+ block_sigs(&oldset);
+ intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
+ restore_sigs(&oldset);
+ err = -EINTR;
+ if (intr)
+ goto out;
+
+ err = -ENOTCONN;
+ if (!fc->connected)
+ goto out;
+
+ req = fuse_request_alloc();
+ err = -ENOMEM;
+ if (!req)
+ goto out;
+
+ fuse_req_init_context(req);
+ req->waiting = 1;
+ return req;
+
+ out:
+ atomic_dec(&fc->num_waiting);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(fuse_get_req);
+
+/*
+ * Return request in fuse_file->reserved_req. However that may
+ * currently be in use. If that is the case, wait for it to become
+ * available.
+ */
+static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
+ struct file *file)
+{
+ struct fuse_req *req = NULL;
+ struct fuse_file *ff = file->private_data;
+
+ do {
+ wait_event(fc->reserved_req_waitq, ff->reserved_req);
+ spin_lock(&fc->lock);
+ if (ff->reserved_req) {
+ req = ff->reserved_req;
+ ff->reserved_req = NULL;
+ get_file(file);
+ req->stolen_file = file;
+ }
+ spin_unlock(&fc->lock);
+ } while (!req);
+
+ return req;
+}
+
+/*
+ * Put stolen request back into fuse_file->reserved_req
+ */
+static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct file *file = req->stolen_file;
+ struct fuse_file *ff = file->private_data;
+
+ spin_lock(&fc->lock);
+ fuse_request_init(req);
+ BUG_ON(ff->reserved_req);
+ ff->reserved_req = req;
+ wake_up_all(&fc->reserved_req_waitq);
+ spin_unlock(&fc->lock);
+ fput(file);
+}
+
+/*
+ * Gets a requests for a file operation, always succeeds
+ *
+ * This is used for sending the FLUSH request, which must get to
+ * userspace, due to POSIX locks which may need to be unlocked.
+ *
+ * If allocation fails due to OOM, use the reserved request in
+ * fuse_file.
+ *
+ * This is very unlikely to deadlock accidentally, since the
+ * filesystem should not have it's own file open. If deadlock is
+ * intentional, it can still be broken by "aborting" the filesystem.
+ */
+struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
+{
+ struct fuse_req *req;
+
+ atomic_inc(&fc->num_waiting);
+ wait_event(fc->blocked_waitq, !fc->blocked);
+ req = fuse_request_alloc();
+ if (!req)
+ req = get_reserved_req(fc, file);
+
+ fuse_req_init_context(req);
+ req->waiting = 1;
+ return req;
+}
+
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (atomic_dec_and_test(&req->count)) {
+ if (req->waiting)
+ atomic_dec(&fc->num_waiting);
+
+ if (req->stolen_file)
+ put_reserved_req(fc, req);
+ else
+ fuse_request_free(req);
+ }
+}
+EXPORT_SYMBOL_GPL(fuse_put_request);
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+ unsigned nbytes = 0;
+ unsigned i;
+
+ for (i = 0; i < numargs; i++)
+ nbytes += args[i].size;
+
+ return nbytes;
+}
+
+static u64 fuse_get_unique(struct fuse_conn *fc)
+{
+ fc->reqctr++;
+ /* zero is special */
+ if (fc->reqctr == 0)
+ fc->reqctr = 1;
+
+ return fc->reqctr;
+}
+
+static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->in.h.len = sizeof(struct fuse_in_header) +
+ len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+ list_add_tail(&req->list, &fc->pending);
+ req->state = FUSE_REQ_PENDING;
+ if (!req->waiting) {
+ req->waiting = 1;
+ atomic_inc(&fc->num_waiting);
+ }
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+}
+
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup)
+{
+ forget->forget_one.nodeid = nodeid;
+ forget->forget_one.nlookup = nlookup;
+
+ spin_lock(&fc->lock);
+ if (fc->connected) {
+ fc->forget_list_tail->next = forget;
+ fc->forget_list_tail = forget;
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ } else {
+ kfree(forget);
+ }
+ spin_unlock(&fc->lock);
+}
+
+static void flush_bg_queue(struct fuse_conn *fc)
+{
+ while (fc->active_background < fc->max_background &&
+ !list_empty(&fc->bg_queue)) {
+ struct fuse_req *req;
+
+ req = list_entry(fc->bg_queue.next, struct fuse_req, list);
+ list_del(&req->list);
+ fc->active_background++;
+ req->in.h.unique = fuse_get_unique(fc);
+ queue_request(fc, req);
+ }
+}
+
+/*
+ * This function is called when a request is finished. Either a reply
+ * has arrived or it was aborted (and not yet sent) or some error
+ * occurred during communication with userspace, or the device file
+ * was closed. The requester thread is woken up (if still waiting),
+ * the 'end' callback is called if given, else the reference to the
+ * request is released
+ *
+ * Called with fc->lock, unlocks it
+ */
+static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+__releases(fc->lock)
+{
+ void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
+ req->end = NULL;
+ list_del(&req->list);
+ list_del(&req->intr_entry);
+ req->state = FUSE_REQ_FINISHED;
+ if (req->background) {
+ if (fc->num_background == fc->max_background) {
+ fc->blocked = 0;
+ wake_up_all(&fc->blocked_waitq);
+ }
+ if (fc->num_background == fc->congestion_threshold &&
+ fc->connected && fc->bdi_initialized) {
+ clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+ clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
+ }
+ fc->num_background--;
+ fc->active_background--;
+ flush_bg_queue(fc);
+ }
+ spin_unlock(&fc->lock);
+ wake_up(&req->waitq);
+ if (end)
+ end(fc, req);
+ fuse_put_request(fc, req);
+}
+
+static void wait_answer_interruptible(struct fuse_conn *fc,
+ struct fuse_req *req)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ if (signal_pending(current))
+ return;
+
+ spin_unlock(&fc->lock);
+ wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
+ spin_lock(&fc->lock);
+}
+
+static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
+{
+ list_add_tail(&req->intr_entry, &fc->interrupts);
+ wake_up(&fc->waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+}
+
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ if (!fc->no_interrupt) {
+ /* Any signal may interrupt this */
+ wait_answer_interruptible(fc, req);
+
+ if (req->aborted)
+ goto aborted;
+ if (req->state == FUSE_REQ_FINISHED)
+ return;
+
+ req->interrupted = 1;
+ if (req->state == FUSE_REQ_SENT)
+ queue_interrupt(fc, req);
+ }
+
+ if (!req->force) {
+ sigset_t oldset;
+
+ /* Only fatal signals may interrupt this */
+ block_sigs(&oldset);
+ wait_answer_interruptible(fc, req);
+ restore_sigs(&oldset);
+
+ if (req->aborted)
+ goto aborted;
+ if (req->state == FUSE_REQ_FINISHED)
+ return;
+
+ /* Request is not yet in userspace, bail out */
+ if (req->state == FUSE_REQ_PENDING) {
+ list_del(&req->list);
+ __fuse_put_request(req);
+ req->out.h.error = -EINTR;
+ return;
+ }
+ }
+
+ /*
+ * Either request is already in userspace, or it was forced.
+ * Wait it out.
+ */
+ spin_unlock(&fc->lock);
+
+ while (req->state != FUSE_REQ_FINISHED)
+ wait_event_freezable(req->waitq,
+ req->state == FUSE_REQ_FINISHED);
+ spin_lock(&fc->lock);
+
+ if (!req->aborted)
+ return;
+
+ aborted:
+ BUG_ON(req->state != FUSE_REQ_FINISHED);
+ if (req->locked) {
+ /* This is uninterruptible sleep, because data is
+ being copied to/from the buffers of req. During
+ locked state, there mustn't be any filesystem
+ operation (e.g. page fault), since that could lead
+ to deadlock */
+ spin_unlock(&fc->lock);
+ wait_event(req->waitq, !req->locked);
+ spin_lock(&fc->lock);
+ }
+}
+
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ spin_lock(&fc->lock);
+ if (!fc->connected)
+ req->out.h.error = -ENOTCONN;
+ else if (fc->conn_error)
+ req->out.h.error = -ECONNREFUSED;
+ else {
+ req->in.h.unique = fuse_get_unique(fc);
+ queue_request(fc, req);
+ /* acquire extra reference, since request is still needed
+ after request_end() */
+ __fuse_get_request(req);
+
+ request_wait_answer(fc, req);
+ }
+ spin_unlock(&fc->lock);
+}
+EXPORT_SYMBOL_GPL(fuse_request_send);
+
+static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
+ struct fuse_req *req)
+{
+ req->background = 1;
+ fc->num_background++;
+ if (fc->num_background == fc->max_background)
+ fc->blocked = 1;
+ if (fc->num_background == fc->congestion_threshold &&
+ fc->bdi_initialized) {
+ set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+ set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
+ }
+ list_add_tail(&req->list, &fc->bg_queue);
+ flush_bg_queue(fc);
+}
+
+static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+{
+ spin_lock(&fc->lock);
+ if (fc->connected) {
+ fuse_request_send_nowait_locked(fc, req);
+ spin_unlock(&fc->lock);
+ } else {
+ req->out.h.error = -ENOTCONN;
+ request_end(fc, req);
+ }
+}
+
+void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ fuse_request_send_nowait(fc, req);
+}
+EXPORT_SYMBOL_GPL(fuse_request_send_background);
+
+static int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique)
+{
+ int err = -ENODEV;
+
+ req->isreply = 0;
+ req->in.h.unique = unique;
+ spin_lock(&fc->lock);
+ if (fc->connected) {
+ queue_request(fc, req);
+ err = 0;
+ }
+ spin_unlock(&fc->lock);
+
+ return err;
+}
+
+/*
+ * Called under fc->lock
+ *
+ * fc->connected must have been checked previously
+ */
+void fuse_request_send_background_locked(struct fuse_conn *fc,
+ struct fuse_req *req)
+{
+ req->isreply = 1;
+ fuse_request_send_nowait_locked(fc, req);
+}
+
+/*
+ * Lock the request. Up to the next unlock_request() there mustn't be
+ * anything that could cause a page-fault. If the request was already
+ * aborted bail out.
+ */
+static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int err = 0;
+ if (req) {
+ spin_lock(&fc->lock);
+ if (req->aborted)
+ err = -ENOENT;
+ else
+ req->locked = 1;
+ spin_unlock(&fc->lock);
+ }
+ return err;
+}
+
+/*
+ * Unlock request. If it was aborted during being locked, the
+ * requester thread is currently waiting for it to be unlocked, so
+ * wake it up.
+ */
+static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (req) {
+ spin_lock(&fc->lock);
+ req->locked = 0;
+ if (req->aborted)
+ wake_up(&req->waitq);
+ spin_unlock(&fc->lock);
+ }
+}
+
+struct fuse_copy_state {
+ struct fuse_conn *fc;
+ int write;
+ struct fuse_req *req;
+ const struct iovec *iov;
+ struct pipe_buffer *pipebufs;
+ struct pipe_buffer *currbuf;
+ struct pipe_inode_info *pipe;
+ unsigned long nr_segs;
+ unsigned long seglen;
+ unsigned long addr;
+ struct page *pg;
+ void *mapaddr;
+ void *buf;
+ unsigned len;
+ unsigned move_pages:1;
+};
+
+static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
+ int write,
+ const struct iovec *iov, unsigned long nr_segs)
+{
+ memset(cs, 0, sizeof(*cs));
+ cs->fc = fc;
+ cs->write = write;
+ cs->iov = iov;
+ cs->nr_segs = nr_segs;
+}
+
+/* Unmap and put previous page of userspace buffer */
+static void fuse_copy_finish(struct fuse_copy_state *cs)
+{
+ if (cs->currbuf) {
+ struct pipe_buffer *buf = cs->currbuf;
+
+ if (!cs->write) {
+ buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
+ } else {
+ kunmap(buf->page);
+ buf->len = PAGE_SIZE - cs->len;
+ }
+ cs->currbuf = NULL;
+ cs->mapaddr = NULL;
+ } else if (cs->mapaddr) {
+ kunmap(cs->pg);
+ if (cs->write) {
+ flush_dcache_page(cs->pg);
+ set_page_dirty_lock(cs->pg);
+ }
+ put_page(cs->pg);
+ cs->mapaddr = NULL;
+ }
+}
+
+/*
+ * Get another pagefull of userspace buffer, and map it to kernel
+ * address space, and lock request
+ */
+static int fuse_copy_fill(struct fuse_copy_state *cs)
+{
+ unsigned long offset;
+ int err;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+ if (cs->pipebufs) {
+ struct pipe_buffer *buf = cs->pipebufs;
+
+ if (!cs->write) {
+ err = buf->ops->confirm(cs->pipe, buf);
+ if (err)
+ return err;
+
+ BUG_ON(!cs->nr_segs);
+ cs->currbuf = buf;
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
+ cs->len = buf->len;
+ cs->buf = cs->mapaddr + buf->offset;
+ cs->pipebufs++;
+ cs->nr_segs--;
+ } else {
+ struct page *page;
+
+ if (cs->nr_segs == cs->pipe->buffers)
+ return -EIO;
+
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page)
+ return -ENOMEM;
+
+ buf->page = page;
+ buf->offset = 0;
+ buf->len = 0;
+
+ cs->currbuf = buf;
+ cs->mapaddr = kmap(page);
+ cs->buf = cs->mapaddr;
+ cs->len = PAGE_SIZE;
+ cs->pipebufs++;
+ cs->nr_segs++;
+ }
+ } else {
+ if (!cs->seglen) {
+ BUG_ON(!cs->nr_segs);
+ cs->seglen = cs->iov[0].iov_len;
+ cs->addr = (unsigned long) cs->iov[0].iov_base;
+ cs->iov++;
+ cs->nr_segs--;
+ }
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ if (err < 0)
+ return err;
+ BUG_ON(err != 1);
+ offset = cs->addr % PAGE_SIZE;
+ cs->mapaddr = kmap(cs->pg);
+ cs->buf = cs->mapaddr + offset;
+ cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->seglen -= cs->len;
+ cs->addr += cs->len;
+ }
+
+ return lock_request(cs->fc, cs->req);
+}
+
+/* Do as much copy to/from userspace buffer as we can */
+static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
+{
+ unsigned ncpy = min(*size, cs->len);
+ if (val) {
+ if (cs->write)
+ memcpy(cs->buf, *val, ncpy);
+ else
+ memcpy(*val, cs->buf, ncpy);
+ *val += ncpy;
+ }
+ *size -= ncpy;
+ cs->len -= ncpy;
+ cs->buf += ncpy;
+ return ncpy;
+}
+
+static int fuse_check_page(struct page *page)
+{
+ if (page_mapcount(page) ||
+ page->mapping != NULL ||
+ page_count(page) != 1 ||
+ (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ ~(1 << PG_locked |
+ 1 << PG_referenced |
+ 1 << PG_uptodate |
+ 1 << PG_lru |
+ 1 << PG_active |
+ 1 << PG_reclaim))) {
+ printk(KERN_WARNING "fuse: trying to steal weird page\n");
+ printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
+ return 1;
+ }
+ return 0;
+}
+
+static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+{
+ int err;
+ struct page *oldpage = *pagep;
+ struct page *newpage;
+ struct pipe_buffer *buf = cs->pipebufs;
+ struct address_space *mapping;
+ pgoff_t index;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+
+ err = buf->ops->confirm(cs->pipe, buf);
+ if (err)
+ return err;
+
+ BUG_ON(!cs->nr_segs);
+ cs->currbuf = buf;
+ cs->len = buf->len;
+ cs->pipebufs++;
+ cs->nr_segs--;
+
+ if (cs->len != PAGE_SIZE)
+ goto out_fallback;
+
+ if (buf->ops->steal(cs->pipe, buf) != 0)
+ goto out_fallback;
+
+ newpage = buf->page;
+
+ if (WARN_ON(!PageUptodate(newpage)))
+ return -EIO;
+
+ ClearPageMappedToDisk(newpage);
+
+ if (fuse_check_page(newpage) != 0)
+ goto out_fallback_unlock;
+
+ mapping = oldpage->mapping;
+ index = oldpage->index;
+
+ /*
+ * This is a new and locked page, it shouldn't be mapped or
+ * have any special flags on it
+ */
+ if (WARN_ON(page_mapped(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(page_has_private(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(PageMlocked(oldpage)))
+ goto out_fallback_unlock;
+
+ err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
+ if (err) {
+ unlock_page(newpage);
+ return err;
+ }
+
+ page_cache_get(newpage);
+
+ if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+ lru_cache_add_file(newpage);
+
+ err = 0;
+ spin_lock(&cs->fc->lock);
+ if (cs->req->aborted)
+ err = -ENOENT;
+ else
+ *pagep = newpage;
+ spin_unlock(&cs->fc->lock);
+
+ if (err) {
+ unlock_page(newpage);
+ page_cache_release(newpage);
+ return err;
+ }
+
+ unlock_page(oldpage);
+ page_cache_release(oldpage);
+ cs->len = 0;
+
+ return 0;
+
+out_fallback_unlock:
+ unlock_page(newpage);
+out_fallback:
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->buf = cs->mapaddr + buf->offset;
+
+ err = lock_request(cs->fc, cs->req);
+ if (err)
+ return err;
+
+ return 1;
+}
+
+static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
+ unsigned offset, unsigned count)
+{
+ struct pipe_buffer *buf;
+
+ if (cs->nr_segs == cs->pipe->buffers)
+ return -EIO;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+
+ buf = cs->pipebufs;
+ page_cache_get(page);
+ buf->page = page;
+ buf->offset = offset;
+ buf->len = count;
+
+ cs->pipebufs++;
+ cs->nr_segs++;
+ cs->len = 0;
+
+ return 0;
+}
+
+/*
+ * Copy a page in the request to/from the userspace buffer. Must be
+ * done atomically
+ */
+static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing)
+{
+ int err;
+ struct page *page = *pagep;
+
+ if (page && zeroing && count < PAGE_SIZE)
+ clear_highpage(page);
+
+ while (count) {
+ if (cs->write && cs->pipebufs && page) {
+ return fuse_ref_page(cs, page, offset, count);
+ } else if (!cs->len) {
+ if (cs->move_pages && page &&
+ offset == 0 && count == PAGE_SIZE) {
+ err = fuse_try_move_page(cs, pagep);
+ if (err <= 0)
+ return err;
+ } else {
+ err = fuse_copy_fill(cs);
+ if (err)
+ return err;
+ }
+ }
+ if (page) {
+ void *mapaddr = kmap_atomic(page, KM_USER0);
+ void *buf = mapaddr + offset;
+ offset += fuse_copy_do(cs, &buf, &count);
+ kunmap_atomic(mapaddr, KM_USER0);
+ } else
+ offset += fuse_copy_do(cs, NULL, &count);
+ }
+ if (page && !cs->write)
+ flush_dcache_page(page);
+ return 0;
+}
+
+/* Copy pages in the request to/from userspace buffer */
+static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
+ int zeroing)
+{
+ unsigned i;
+ struct fuse_req *req = cs->req;
+ unsigned offset = req->page_offset;
+ unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
+
+ for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+ int err;
+
+ err = fuse_copy_page(cs, &req->pages[i], offset, count,
+ zeroing);
+ if (err)
+ return err;
+
+ nbytes -= count;
+ count = min(nbytes, (unsigned) PAGE_SIZE);
+ offset = 0;
+ }
+ return 0;
+}
+
+/* Copy a single argument in the request to/from userspace buffer */
+static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
+{
+ while (size) {
+ if (!cs->len) {
+ int err = fuse_copy_fill(cs);
+ if (err)
+ return err;
+ }
+ fuse_copy_do(cs, &val, &size);
+ }
+ return 0;
+}
+
+/* Copy request arguments to/from userspace buffer */
+static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
+ unsigned argpages, struct fuse_arg *args,
+ int zeroing)
+{
+ int err = 0;
+ unsigned i;
+
+ for (i = 0; !err && i < numargs; i++) {
+ struct fuse_arg *arg = &args[i];
+ if (i == numargs - 1 && argpages)
+ err = fuse_copy_pages(cs, arg->size, zeroing);
+ else
+ err = fuse_copy_one(cs, arg->value, arg->size);
+ }
+ return err;
+}
+
+static int forget_pending(struct fuse_conn *fc)
+{
+ return fc->forget_list_head.next != NULL;
+}
+
+static int request_pending(struct fuse_conn *fc)
+{
+ return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
+ forget_pending(fc);
+}
+
+/* Wait until a request is available on the pending list */
+static void request_wait(struct fuse_conn *fc)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&fc->waitq, &wait);
+ while (fc->connected && !request_pending(fc)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current))
+ break;
+
+ spin_unlock(&fc->lock);
+ schedule();
+ spin_lock(&fc->lock);
+ }
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&fc->waitq, &wait);
+}
+
+/*
+ * Transfer an interrupt request to userspace
+ *
+ * Unlike other requests this is assembled on demand, without a need
+ * to allocate a separate fuse_req structure.
+ *
+ * Called with fc->lock held, releases it
+ */
+static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
+ size_t nbytes, struct fuse_req *req)
+__releases(fc->lock)
+{
+ struct fuse_in_header ih;
+ struct fuse_interrupt_in arg;
+ unsigned reqsize = sizeof(ih) + sizeof(arg);
+ int err;
+
+ list_del_init(&req->intr_entry);
+ req->intr_unique = fuse_get_unique(fc);
+ memset(&ih, 0, sizeof(ih));
+ memset(&arg, 0, sizeof(arg));
+ ih.len = reqsize;
+ ih.opcode = FUSE_INTERRUPT;
+ ih.unique = req->intr_unique;
+ arg.unique = req->in.h.unique;
+
+ spin_unlock(&fc->lock);
+ if (nbytes < reqsize)
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
+
+ return err ? err : reqsize;
+}
+
+static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
+ unsigned max,
+ unsigned *countp)
+{
+ struct fuse_forget_link *head = fc->forget_list_head.next;
+ struct fuse_forget_link **newhead = &head;
+ unsigned count;
+
+ for (count = 0; *newhead != NULL && count < max; count++)
+ newhead = &(*newhead)->next;
+
+ fc->forget_list_head.next = *newhead;
+ *newhead = NULL;
+ if (fc->forget_list_head.next == NULL)
+ fc->forget_list_tail = &fc->forget_list_head;
+
+ if (countp != NULL)
+ *countp = count;
+
+ return head;
+}
+
+static int fuse_read_single_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
+ struct fuse_forget_in arg = {
+ .nlookup = forget->forget_one.nlookup,
+ };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_FORGET,
+ .nodeid = forget->forget_one.nodeid,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ spin_unlock(&fc->lock);
+ kfree(forget);
+ if (nbytes < ih.len)
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+static int fuse_read_batch_forget(struct fuse_conn *fc,
+ struct fuse_copy_state *cs, size_t nbytes)
+__releases(fc->lock)
+{
+ int err;
+ unsigned max_forgets;
+ unsigned count;
+ struct fuse_forget_link *head;
+ struct fuse_batch_forget_in arg = { .count = 0 };
+ struct fuse_in_header ih = {
+ .opcode = FUSE_BATCH_FORGET,
+ .unique = fuse_get_unique(fc),
+ .len = sizeof(ih) + sizeof(arg),
+ };
+
+ if (nbytes < ih.len) {
+ spin_unlock(&fc->lock);
+ return -EINVAL;
+ }
+
+ max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
+ head = dequeue_forget(fc, max_forgets, &count);
+ spin_unlock(&fc->lock);
+
+ arg.count = count;
+ ih.len += count * sizeof(struct fuse_forget_one);
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
+ if (!err)
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+
+ while (head) {
+ struct fuse_forget_link *forget = head;
+
+ if (!err) {
+ err = fuse_copy_one(cs, &forget->forget_one,
+ sizeof(forget->forget_one));
+ }
+ head = forget->next;
+ kfree(forget);
+ }
+
+ fuse_copy_finish(cs);
+
+ if (err)
+ return err;
+
+ return ih.len;
+}
+
+static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
+ size_t nbytes)
+__releases(fc->lock)
+{
+ if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
+ return fuse_read_single_forget(fc, cs, nbytes);
+ else
+ return fuse_read_batch_forget(fc, cs, nbytes);
+}
+
+/*
+ * Read a single request into the userspace filesystem's buffer. This
+ * function waits until a request is available, then removes it from
+ * the pending list and copies request data to userspace buffer. If
+ * no reply is needed (FORGET) or request has been aborted or there
+ * was an error during the copying then it's finished by calling
+ * request_end(). Otherwise add it to the processing list, and set
+ * the 'sent' flag.
+ */
+static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
+ struct fuse_copy_state *cs, size_t nbytes)
+{
+ int err;
+ struct fuse_req *req;
+ struct fuse_in *in;
+ unsigned reqsize;
+
+ restart:
+ spin_lock(&fc->lock);
+ err = -EAGAIN;
+ if ((file->f_flags & O_NONBLOCK) && fc->connected &&
+ !request_pending(fc))
+ goto err_unlock;
+
+ request_wait(fc);
+ err = -ENODEV;
+ if (!fc->connected)
+ goto err_unlock;
+ err = -ERESTARTSYS;
+ if (!request_pending(fc))
+ goto err_unlock;
+
+ if (!list_empty(&fc->interrupts)) {
+ req = list_entry(fc->interrupts.next, struct fuse_req,
+ intr_entry);
+ return fuse_read_interrupt(fc, cs, nbytes, req);
+ }
+
+ if (forget_pending(fc)) {
+ if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
+ return fuse_read_forget(fc, cs, nbytes);
+
+ if (fc->forget_batch <= -8)
+ fc->forget_batch = 16;
+ }
+
+ req = list_entry(fc->pending.next, struct fuse_req, list);
+ req->state = FUSE_REQ_READING;
+ list_move(&req->list, &fc->io);
+
+ in = &req->in;
+ reqsize = in->h.len;
+ /* If request is too large, reply with an error and restart the read */
+ if (nbytes < reqsize) {
+ req->out.h.error = -EIO;
+ /* SETXATTR is special, since it may contain too large data */
+ if (in->h.opcode == FUSE_SETXATTR)
+ req->out.h.error = -E2BIG;
+ request_end(fc, req);
+ goto restart;
+ }
+ spin_unlock(&fc->lock);
+ cs->req = req;
+ err = fuse_copy_one(cs, &in->h, sizeof(in->h));
+ if (!err)
+ err = fuse_copy_args(cs, in->numargs, in->argpages,
+ (struct fuse_arg *) in->args, 0);
+ fuse_copy_finish(cs);
+ spin_lock(&fc->lock);
+ req->locked = 0;
+ if (req->aborted) {
+ request_end(fc, req);
+ return -ENODEV;
+ }
+ if (err) {
+ req->out.h.error = -EIO;
+ request_end(fc, req);
+ return err;
+ }
+ if (!req->isreply)
+ request_end(fc, req);
+ else {
+ req->state = FUSE_REQ_SENT;
+ list_move_tail(&req->list, &fc->processing);
+ if (req->interrupted)
+ queue_interrupt(fc, req);
+ spin_unlock(&fc->lock);
+ }
+ return reqsize;
+
+ err_unlock:
+ spin_unlock(&fc->lock);
+ return err;
+}
+
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct fuse_copy_state cs;
+ struct file *file = iocb->ki_filp;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -EPERM;
+
+ fuse_copy_init(&cs, fc, 1, iov, nr_segs);
+
+ return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
+}
+
+static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = fuse_dev_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ int ret;
+ int page_nr = 0;
+ int do_wakeup = 0;
+ struct pipe_buffer *bufs;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(in);
+ if (!fc)
+ return -EPERM;
+
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
+ if (!bufs)
+ return -ENOMEM;
+
+ fuse_copy_init(&cs, fc, 1, NULL, 0);
+ cs.pipebufs = bufs;
+ cs.pipe = pipe;
+ ret = fuse_dev_do_read(fc, in, &cs, len);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+ pipe_lock(pipe);
+
+ if (!pipe->readers) {
+ send_sig(SIGPIPE, current, 0);
+ if (!ret)
+ ret = -EPIPE;
+ goto out_unlock;
+ }
+
+ if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ while (page_nr < cs.nr_segs) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
+
+ buf->page = bufs[page_nr].page;
+ buf->offset = bufs[page_nr].offset;
+ buf->len = bufs[page_nr].len;
+ buf->ops = &fuse_dev_pipe_buf_ops;
+
+ pipe->nrbufs++;
+ page_nr++;
+ ret += buf->len;
+
+ if (pipe->inode)
+ do_wakeup = 1;
+ }
+
+out_unlock:
+ pipe_unlock(pipe);
+
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ }
+
+out:
+ for (; page_nr < cs.nr_segs; page_nr++)
+ page_cache_release(bufs[page_nr].page);
+
+ kfree(bufs);
+ return ret;
+}
+
+static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_poll_wakeup_out outarg;
+ int err = -EINVAL;
+
+ if (size != sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+
+ fuse_copy_finish(cs);
+ return fuse_notify_poll_wakeup(fc, &outarg);
+
+err:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_inval_inode_out outarg;
+ int err = -EINVAL;
+
+ if (size != sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb) {
+ err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
+ outarg.off, outarg.len);
+ }
+ up_read(&fc->killsb);
+ return err;
+
+err:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_inval_entry_out outarg;
+ int err = -ENOMEM;
+ char *buf;
+ struct qstr name;
+
+ buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
+ if (!buf)
+ goto err;
+
+ err = -EINVAL;
+ if (size < sizeof(outarg))
+ goto err;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto err;
+
+ err = -ENAMETOOLONG;
+ if (outarg.namelen > FUSE_NAME_MAX)
+ goto err;
+
+ err = -EINVAL;
+ if (size != sizeof(outarg) + outarg.namelen + 1)
+ goto err;
+
+ name.name = buf;
+ name.len = outarg.namelen;
+ err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+ if (err)
+ goto err;
+ fuse_copy_finish(cs);
+ buf[outarg.namelen] = 0;
+ name.hash = full_name_hash(name.name, name.len);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb)
+ err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+ up_read(&fc->killsb);
+ kfree(buf);
+ return err;
+
+err:
+ kfree(buf);
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_store_out outarg;
+ struct inode *inode;
+ struct address_space *mapping;
+ u64 nodeid;
+ int err;
+ pgoff_t index;
+ unsigned int offset;
+ unsigned int num;
+ loff_t file_size;
+ loff_t end;
+
+ err = -EINVAL;
+ if (size < sizeof(outarg))
+ goto out_finish;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto out_finish;
+
+ err = -EINVAL;
+ if (size - sizeof(outarg) != outarg.size)
+ goto out_finish;
+
+ nodeid = outarg.nodeid;
+
+ down_read(&fc->killsb);
+
+ err = -ENOENT;
+ if (!fc->sb)
+ goto out_up_killsb;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ goto out_up_killsb;
+
+ mapping = inode->i_mapping;
+ index = outarg.offset >> PAGE_CACHE_SHIFT;
+ offset = outarg.offset & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
+ end = outarg.offset + outarg.size;
+ if (end > file_size) {
+ file_size = end;
+ fuse_write_update_size(inode, file_size);
+ }
+
+ num = outarg.size;
+ while (num) {
+ struct page *page;
+ unsigned int this_num;
+
+ err = -ENOMEM;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+ if (!page)
+ goto out_iput;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ err = fuse_copy_page(cs, &page, offset, this_num, 0);
+ if (!err && offset == 0 && (num != 0 || file_size == end))
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ num -= this_num;
+ offset = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ iput(inode);
+out_up_killsb:
+ up_read(&fc->killsb);
+out_finish:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ int err;
+ struct address_space *mapping = inode->i_mapping;
+ struct fuse_req *req;
+ pgoff_t index;
+ loff_t file_size;
+ unsigned int num;
+ unsigned int offset;
+ size_t total_len = 0;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->page_offset = offset;
+ req->end = fuse_retrieve_end;
+
+ index = outarg->offset >> PAGE_CACHE_SHIFT;
+ file_size = i_size_read(inode);
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = find_get_page(mapping, index);
+ if (!page)
+ break;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_end(fc, req);
+
+ return err;
+}
+
+static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_retrieve_out outarg;
+ struct inode *inode;
+ int err;
+
+ err = -EINVAL;
+ if (size != sizeof(outarg))
+ goto copy_finish;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ goto copy_finish;
+
+ fuse_copy_finish(cs);
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb) {
+ u64 nodeid = outarg.nodeid;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (inode) {
+ err = fuse_retrieve(fc, inode, &outarg);
+ iput(inode);
+ }
+ }
+ up_read(&fc->killsb);
+
+ return err;
+
+copy_finish:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
+ unsigned int size, struct fuse_copy_state *cs)
+{
+ switch (code) {
+ case FUSE_NOTIFY_POLL:
+ return fuse_notify_poll(fc, size, cs);
+
+ case FUSE_NOTIFY_INVAL_INODE:
+ return fuse_notify_inval_inode(fc, size, cs);
+
+ case FUSE_NOTIFY_INVAL_ENTRY:
+ return fuse_notify_inval_entry(fc, size, cs);
+
+ case FUSE_NOTIFY_STORE:
+ return fuse_notify_store(fc, size, cs);
+
+ case FUSE_NOTIFY_RETRIEVE:
+ return fuse_notify_retrieve(fc, size, cs);
+
+ default:
+ fuse_copy_finish(cs);
+ return -EINVAL;
+ }
+}
+
+/* Look up request on processing list by unique ID */
+static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
+{
+ struct list_head *entry;
+
+ list_for_each(entry, &fc->processing) {
+ struct fuse_req *req;
+ req = list_entry(entry, struct fuse_req, list);
+ if (req->in.h.unique == unique || req->intr_unique == unique)
+ return req;
+ }
+ return NULL;
+}
+
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+ unsigned nbytes)
+{
+ unsigned reqsize = sizeof(struct fuse_out_header);
+
+ if (out->h.error)
+ return nbytes != reqsize ? -EINVAL : 0;
+
+ reqsize += len_args(out->numargs, out->args);
+
+ if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+ return -EINVAL;
+ else if (reqsize > nbytes) {
+ struct fuse_arg *lastarg = &out->args[out->numargs-1];
+ unsigned diffsize = reqsize - nbytes;
+ if (diffsize > lastarg->size)
+ return -EINVAL;
+ lastarg->size -= diffsize;
+ }
+ return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
+ out->page_zeroing);
+}
+
+/*
+ * Write a single reply to a request. First the header is copied from
+ * the write buffer. The request is then searched on the processing
+ * list by the unique ID found in the header. If found, then remove
+ * it from the list and copy the rest of the buffer to the request.
+ * The request is finished by calling request_end()
+ */
+static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
+ struct fuse_copy_state *cs, size_t nbytes)
+{
+ int err;
+ struct fuse_req *req;
+ struct fuse_out_header oh;
+
+ if (nbytes < sizeof(struct fuse_out_header))
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &oh, sizeof(oh));
+ if (err)
+ goto err_finish;
+
+ err = -EINVAL;
+ if (oh.len != nbytes)
+ goto err_finish;
+
+ /*
+ * Zero oh.unique indicates unsolicited notification message
+ * and error contains notification code.
+ */
+ if (!oh.unique) {
+ err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
+ return err ? err : nbytes;
+ }
+
+ err = -EINVAL;
+ if (oh.error <= -1000 || oh.error > 0)
+ goto err_finish;
+
+ spin_lock(&fc->lock);
+ err = -ENOENT;
+ if (!fc->connected)
+ goto err_unlock;
+
+ req = request_find(fc, oh.unique);
+ if (!req)
+ goto err_unlock;
+
+ if (req->aborted) {
+ spin_unlock(&fc->lock);
+ fuse_copy_finish(cs);
+ spin_lock(&fc->lock);
+ request_end(fc, req);
+ return -ENOENT;
+ }
+ /* Is it an interrupt reply? */
+ if (req->intr_unique == oh.unique) {
+ err = -EINVAL;
+ if (nbytes != sizeof(struct fuse_out_header))
+ goto err_unlock;
+
+ if (oh.error == -ENOSYS)
+ fc->no_interrupt = 1;
+ else if (oh.error == -EAGAIN)
+ queue_interrupt(fc, req);
+
+ spin_unlock(&fc->lock);
+ fuse_copy_finish(cs);
+ return nbytes;
+ }
+
+ req->state = FUSE_REQ_WRITING;
+ list_move(&req->list, &fc->io);
+ req->out.h = oh;
+ req->locked = 1;
+ cs->req = req;
+ if (!req->out.page_replace)
+ cs->move_pages = 0;
+ spin_unlock(&fc->lock);
+
+ err = copy_out_args(cs, &req->out, nbytes);
+ fuse_copy_finish(cs);
+
+ spin_lock(&fc->lock);
+ req->locked = 0;
+ if (!err) {
+ if (req->aborted)
+ err = -ENOENT;
+ } else if (!req->aborted)
+ req->out.h.error = -EIO;
+ request_end(fc, req);
+
+ return err ? err : nbytes;
+
+ err_unlock:
+ spin_unlock(&fc->lock);
+ err_finish:
+ fuse_copy_finish(cs);
+ return err;
+}
+
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
+ if (!fc)
+ return -EPERM;
+
+ fuse_copy_init(&cs, fc, 0, iov, nr_segs);
+
+ return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
+}
+
+static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ unsigned nbuf;
+ unsigned idx;
+ struct pipe_buffer *bufs;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc;
+ size_t rem;
+ ssize_t ret;
+
+ fc = fuse_get_conn(out);
+ if (!fc)
+ return -EPERM;
+
+ bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
+ if (!bufs)
+ return -ENOMEM;
+
+ pipe_lock(pipe);
+ nbuf = 0;
+ rem = 0;
+ for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
+ rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
+
+ ret = -EINVAL;
+ if (rem < len) {
+ pipe_unlock(pipe);
+ goto out;
+ }
+
+ rem = len;
+ while (rem) {
+ struct pipe_buffer *ibuf;
+ struct pipe_buffer *obuf;
+
+ BUG_ON(nbuf >= pipe->buffers);
+ BUG_ON(!pipe->nrbufs);
+ ibuf = &pipe->bufs[pipe->curbuf];
+ obuf = &bufs[nbuf];
+
+ if (rem >= ibuf->len) {
+ *obuf = *ibuf;
+ ibuf->ops = NULL;
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ pipe->nrbufs--;
+ } else {
+ ibuf->ops->get(pipe, ibuf);
+ *obuf = *ibuf;
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ obuf->len = rem;
+ ibuf->offset += obuf->len;
+ ibuf->len -= obuf->len;
+ }
+ nbuf++;
+ rem -= obuf->len;
+ }
+ pipe_unlock(pipe);
+
+ fuse_copy_init(&cs, fc, 0, NULL, nbuf);
+ cs.pipebufs = bufs;
+ cs.pipe = pipe;
+
+ if (flags & SPLICE_F_MOVE)
+ cs.move_pages = 1;
+
+ ret = fuse_dev_do_write(fc, &cs, len);
+
+ for (idx = 0; idx < nbuf; idx++) {
+ struct pipe_buffer *buf = &bufs[idx];
+ buf->ops->release(pipe, buf);
+ }
+out:
+ kfree(bufs);
+ return ret;
+}
+
+static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
+{
+ unsigned mask = POLLOUT | POLLWRNORM;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return POLLERR;
+
+ poll_wait(file, &fc->waitq, wait);
+
+ spin_lock(&fc->lock);
+ if (!fc->connected)
+ mask = POLLERR;
+ else if (request_pending(fc))
+ mask |= POLLIN | POLLRDNORM;
+ spin_unlock(&fc->lock);
+
+ return mask;
+}
+
+/*
+ * Abort all requests on the given list (pending or processing)
+ *
+ * This function releases and reacquires fc->lock
+ */
+static void end_requests(struct fuse_conn *fc, struct list_head *head)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ while (!list_empty(head)) {
+ struct fuse_req *req;
+ req = list_entry(head->next, struct fuse_req, list);
+ req->out.h.error = -ECONNABORTED;
+ request_end(fc, req);
+ spin_lock(&fc->lock);
+ }
+}
+
+/*
+ * Abort requests under I/O
+ *
+ * The requests are set to aborted and finished, and the request
+ * waiter is woken up. This will make request_wait_answer() wait
+ * until the request is unlocked and then return.
+ *
+ * If the request is asynchronous, then the end function needs to be
+ * called after waiting for the request to be unlocked (if it was
+ * locked).
+ */
+static void end_io_requests(struct fuse_conn *fc)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ while (!list_empty(&fc->io)) {
+ struct fuse_req *req =
+ list_entry(fc->io.next, struct fuse_req, list);
+ void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
+
+ req->aborted = 1;
+ req->out.h.error = -ECONNABORTED;
+ req->state = FUSE_REQ_FINISHED;
+ list_del_init(&req->list);
+ wake_up(&req->waitq);
+ if (end) {
+ req->end = NULL;
+ __fuse_get_request(req);
+ spin_unlock(&fc->lock);
+ wait_event(req->waitq, !req->locked);
+ end(fc, req);
+ fuse_put_request(fc, req);
+ spin_lock(&fc->lock);
+ }
+ }
+}
+
+static void end_queued_requests(struct fuse_conn *fc)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ fc->max_background = UINT_MAX;
+ flush_bg_queue(fc);
+ end_requests(fc, &fc->pending);
+ end_requests(fc, &fc->processing);
+ while (forget_pending(fc))
+ kfree(dequeue_forget(fc, 1, NULL));
+}
+
+static void end_polls(struct fuse_conn *fc)
+{
+ struct rb_node *p;
+
+ p = rb_first(&fc->polled_files);
+
+ while (p) {
+ struct fuse_file *ff;
+ ff = rb_entry(p, struct fuse_file, polled_node);
+ wake_up_interruptible_all(&ff->poll_wait);
+
+ p = rb_next(p);
+ }
+}
+
+/*
+ * Abort all requests.
+ *
+ * Emergency exit in case of a malicious or accidental deadlock, or
+ * just a hung filesystem.
+ *
+ * The same effect is usually achievable through killing the
+ * filesystem daemon and all users of the filesystem. The exception
+ * is the combination of an asynchronous request and the tricky
+ * deadlock (see Documentation/filesystems/fuse.txt).
+ *
+ * During the aborting, progression of requests from the pending and
+ * processing lists onto the io list, and progression of new requests
+ * onto the pending list is prevented by req->connected being false.
+ *
+ * Progression of requests under I/O to the processing list is
+ * prevented by the req->aborted flag being true for these requests.
+ * For this reason requests on the io list must be aborted first.
+ */
+void fuse_abort_conn(struct fuse_conn *fc)
+{
+ spin_lock(&fc->lock);
+ if (fc->connected) {
+ fc->connected = 0;
+ fc->blocked = 0;
+ end_io_requests(fc);
+ end_queued_requests(fc);
+ end_polls(fc);
+ wake_up_all(&fc->waitq);
+ wake_up_all(&fc->blocked_waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ }
+ spin_unlock(&fc->lock);
+}
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
+
+int fuse_dev_release(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (fc) {
+ spin_lock(&fc->lock);
+ fc->connected = 0;
+ fc->blocked = 0;
+ end_queued_requests(fc);
+ end_polls(fc);
+ wake_up_all(&fc->blocked_waitq);
+ spin_unlock(&fc->lock);
+ fuse_conn_put(fc);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fuse_dev_release);
+
+static int fuse_dev_fasync(int fd, struct file *file, int on)
+{
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -EPERM;
+
+ /* No locking - fasync_helper does its own locking */
+ return fasync_helper(fd, file, on, &fc->fasync);
+}
+
+const struct file_operations fuse_dev_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = do_sync_read,
+ .aio_read = fuse_dev_read,
+ .splice_read = fuse_dev_splice_read,
+ .write = do_sync_write,
+ .aio_write = fuse_dev_write,
+ .splice_write = fuse_dev_splice_write,
+ .poll = fuse_dev_poll,
+ .release = fuse_dev_release,
+ .fasync = fuse_dev_fasync,
+};
+EXPORT_SYMBOL_GPL(fuse_dev_operations);
+
+static struct miscdevice fuse_miscdevice = {
+ .minor = FUSE_MINOR,
+ .name = "fuse",
+ .fops = &fuse_dev_operations,
+};
+
+int __init fuse_dev_init(void)
+{
+ int err = -ENOMEM;
+ fuse_req_cachep = kmem_cache_create("fuse_request",
+ sizeof(struct fuse_req),
+ 0, 0, NULL);
+ if (!fuse_req_cachep)
+ goto out;
+
+ err = misc_register(&fuse_miscdevice);
+ if (err)
+ goto out_cache_clean;
+
+ return 0;
+
+ out_cache_clean:
+ kmem_cache_destroy(fuse_req_cachep);
+ out:
+ return err;
+}
+
+void fuse_dev_cleanup(void)
+{
+ misc_deregister(&fuse_miscdevice);
+ kmem_cache_destroy(fuse_req_cachep);
+}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 00000000..c04a025c
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,1638 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+
+#if BITS_PER_LONG >= 64
+static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+ entry->d_time = time;
+}
+
+static inline u64 fuse_dentry_time(struct dentry *entry)
+{
+ return entry->d_time;
+}
+#else
+/*
+ * On 32 bit archs store the high 32 bits of time in d_fsdata
+ */
+static void fuse_dentry_settime(struct dentry *entry, u64 time)
+{
+ entry->d_time = time;
+ entry->d_fsdata = (void *) (unsigned long) (time >> 32);
+}
+
+static u64 fuse_dentry_time(struct dentry *entry)
+{
+ return (u64) entry->d_time +
+ ((u64) (unsigned long) entry->d_fsdata << 32);
+}
+#endif
+
+/*
+ * FUSE caches dentries and attributes with separate timeout. The
+ * time in jiffies until the dentry/attributes are valid is stored in
+ * dentry->d_time and fuse_inode->i_time respectively.
+ */
+
+/*
+ * Calculate the time in jiffies until a dentry/attributes are valid
+ */
+static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
+{
+ if (sec || nsec) {
+ struct timespec ts = {sec, nsec};
+ return get_jiffies_64() + timespec_to_jiffies(&ts);
+ } else
+ return 0;
+}
+
+/*
+ * Set dentry and possibly attribute timeouts from the lookup/mk*
+ * replies
+ */
+static void fuse_change_entry_timeout(struct dentry *entry,
+ struct fuse_entry_out *o)
+{
+ fuse_dentry_settime(entry,
+ time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
+}
+
+static u64 attr_timeout(struct fuse_attr_out *o)
+{
+ return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+}
+
+static u64 entry_attr_timeout(struct fuse_entry_out *o)
+{
+ return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+}
+
+/*
+ * Mark the attributes as stale, so that at the next call to
+ * ->getattr() they will be fetched from userspace
+ */
+void fuse_invalidate_attr(struct inode *inode)
+{
+ get_fuse_inode(inode)->i_time = 0;
+}
+
+/*
+ * Just mark the entry as stale, so that a next attempt to look it up
+ * will result in a new lookup call to userspace
+ *
+ * This is called when a dentry is about to become negative and the
+ * timeout is unknown (unlink, rmdir, rename and in some cases
+ * lookup)
+ */
+void fuse_invalidate_entry_cache(struct dentry *entry)
+{
+ fuse_dentry_settime(entry, 0);
+}
+
+/*
+ * Same as fuse_invalidate_entry_cache(), but also try to remove the
+ * dentry from the hash
+ */
+static void fuse_invalidate_entry(struct dentry *entry)
+{
+ d_invalidate(entry);
+ fuse_invalidate_entry_cache(entry);
+}
+
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+ u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg)
+{
+ memset(outarg, 0, sizeof(struct fuse_entry_out));
+ req->in.h.opcode = FUSE_LOOKUP;
+ req->in.h.nodeid = nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = name->len + 1;
+ req->in.args[0].value = name->name;
+ req->out.numargs = 1;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(struct fuse_entry_out);
+ req->out.args[0].value = outarg;
+}
+
+u64 fuse_get_attr_version(struct fuse_conn *fc)
+{
+ u64 curr_version;
+
+ /*
+ * The spin lock isn't actually needed on 64bit archs, but we
+ * don't yet care too much about such optimizations.
+ */
+ spin_lock(&fc->lock);
+ curr_version = fc->attr_version;
+ spin_unlock(&fc->lock);
+
+ return curr_version;
+}
+
+/*
+ * Check whether the dentry is still valid
+ *
+ * If the entry validity timeout has expired and the dentry is
+ * positive, try to redo the lookup. If the lookup results in a
+ * different inode, then let the VFS invalidate the dentry and redo
+ * the lookup once more. If the lookup results in the same inode,
+ * then refresh the attributes, timeouts and mark the dentry valid.
+ */
+static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+{
+ struct inode *inode;
+
+ inode = ACCESS_ONCE(entry->d_inode);
+ if (inode && is_bad_inode(inode))
+ return 0;
+ else if (fuse_dentry_time(entry) < get_jiffies_64()) {
+ int err;
+ struct fuse_entry_out outarg;
+ struct fuse_conn *fc;
+ struct fuse_req *req;
+ struct fuse_forget_link *forget;
+ struct dentry *parent;
+ u64 attr_version;
+
+ /* For negative dentries, always do a fresh lookup */
+ if (!inode)
+ return 0;
+
+ if (nd && (nd->flags & LOOKUP_RCU))
+ return -ECHILD;
+
+ fc = get_fuse_conn(inode);
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return 0;
+
+ forget = fuse_alloc_forget();
+ if (!forget) {
+ fuse_put_request(fc, req);
+ return 0;
+ }
+
+ attr_version = fuse_get_attr_version(fc);
+
+ parent = dget_parent(entry);
+ fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ &entry->d_name, &outarg);
+ fuse_request_send(fc, req);
+ dput(parent);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ /* Zero nodeid is same as -ENOENT */
+ if (!err && !outarg.nodeid)
+ err = -ENOENT;
+ if (!err) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ if (outarg.nodeid != get_node_id(inode)) {
+ fuse_queue_forget(fc, forget, outarg.nodeid, 1);
+ return 0;
+ }
+ spin_lock(&fc->lock);
+ fi->nlookup++;
+ spin_unlock(&fc->lock);
+ }
+ kfree(forget);
+ if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ return 0;
+
+ fuse_change_attributes(inode, &outarg.attr,
+ entry_attr_timeout(&outarg),
+ attr_version);
+ fuse_change_entry_timeout(entry, &outarg);
+ }
+ return 1;
+}
+
+static int invalid_nodeid(u64 nodeid)
+{
+ return !nodeid || nodeid == FUSE_ROOT_ID;
+}
+
+const struct dentry_operations fuse_dentry_operations = {
+ .d_revalidate = fuse_dentry_revalidate,
+};
+
+int fuse_valid_type(int m)
+{
+ return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
+ S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
+}
+
+/*
+ * Add a directory inode to a dentry, ensuring that no other dentry
+ * refers to this inode. Called with fc->inst_mutex.
+ */
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+ struct inode *inode)
+{
+ struct dentry *alias = d_find_alias(inode);
+ if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
+ /* This tries to shrink the subtree below alias */
+ fuse_invalidate_entry(alias);
+ dput(alias);
+ if (!list_empty(&inode->i_dentry))
+ return ERR_PTR(-EBUSY);
+ } else {
+ dput(alias);
+ }
+ return d_splice_alias(inode, entry);
+}
+
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_req *req;
+ struct fuse_forget_link *forget;
+ u64 attr_version;
+ int err;
+
+ *inode = NULL;
+ err = -ENAMETOOLONG;
+ if (name->len > FUSE_NAME_MAX)
+ goto out;
+
+ req = fuse_get_req(fc);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out;
+
+ forget = fuse_alloc_forget();
+ err = -ENOMEM;
+ if (!forget) {
+ fuse_put_request(fc, req);
+ goto out;
+ }
+
+ attr_version = fuse_get_attr_version(fc);
+
+ fuse_lookup_init(fc, req, nodeid, name, outarg);
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ /* Zero nodeid is same as -ENOENT, but with valid timeout */
+ if (err || !outarg->nodeid)
+ goto out_put_forget;
+
+ err = -EIO;
+ if (!outarg->nodeid)
+ goto out_put_forget;
+ if (!fuse_valid_type(outarg->attr.mode))
+ goto out_put_forget;
+
+ *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
+ &outarg->attr, entry_attr_timeout(outarg),
+ attr_version);
+ err = -ENOMEM;
+ if (!*inode) {
+ fuse_queue_forget(fc, forget, outarg->nodeid, 1);
+ goto out;
+ }
+ err = 0;
+
+ out_put_forget:
+ kfree(forget);
+ out:
+ return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+ struct nameidata *nd)
+{
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ struct dentry *newent;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ bool outarg_valid = true;
+
+ err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+ &outarg, &inode);
+ if (err == -ENOENT) {
+ outarg_valid = false;
+ err = 0;
+ }
+ if (err)
+ goto out_err;
+
+ err = -EIO;
+ if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+ goto out_iput;
+
+ if (inode && S_ISDIR(inode->i_mode)) {
+ mutex_lock(&fc->inst_mutex);
+ newent = fuse_d_add_directory(entry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ err = PTR_ERR(newent);
+ if (IS_ERR(newent))
+ goto out_iput;
+ } else {
+ newent = d_splice_alias(inode, entry);
+ }
+
+ entry = newent ? newent : entry;
+ if (outarg_valid)
+ fuse_change_entry_timeout(entry, &outarg);
+ else
+ fuse_invalidate_entry_cache(entry);
+
+ return newent;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
+}
+
+/*
+ * Atomic create+open operation
+ *
+ * If the filesystem doesn't support this, then fall back to separate
+ * 'mknod' + 'open' requests.
+ */
+static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
+ struct nameidata *nd)
+{
+ int err;
+ struct inode *inode;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req;
+ struct fuse_forget_link *forget;
+ struct fuse_create_in inarg;
+ struct fuse_open_out outopen;
+ struct fuse_entry_out outentry;
+ struct fuse_file *ff;
+ struct file *file;
+ int flags = nd->intent.open.flags - 1;
+
+ if (fc->no_create)
+ return -ENOSYS;
+
+ if (flags & O_DIRECT)
+ return -EINVAL;
+
+ forget = fuse_alloc_forget();
+ if (!forget)
+ return -ENOMEM;
+
+ req = fuse_get_req(fc);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out_put_forget_req;
+
+ err = -ENOMEM;
+ ff = fuse_file_alloc(fc);
+ if (!ff)
+ goto out_put_request;
+
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
+ flags &= ~O_NOCTTY;
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outentry, 0, sizeof(outentry));
+ inarg.flags = flags;
+ inarg.mode = mode;
+ inarg.umask = current_umask();
+ req->in.h.opcode = FUSE_CREATE;
+ req->in.h.nodeid = get_node_id(dir);
+ req->in.numargs = 2;
+ req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+ sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ req->out.numargs = 2;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outentry);
+ req->out.args[0].value = &outentry;
+ req->out.args[1].size = sizeof(outopen);
+ req->out.args[1].value = &outopen;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ if (err == -ENOSYS)
+ fc->no_create = 1;
+ goto out_free_ff;
+ }
+
+ err = -EIO;
+ if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
+ goto out_free_ff;
+
+ fuse_put_request(fc, req);
+ ff->fh = outopen.fh;
+ ff->nodeid = outentry.nodeid;
+ ff->open_flags = outopen.open_flags;
+ inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
+ &outentry.attr, entry_attr_timeout(&outentry), 0);
+ if (!inode) {
+ flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
+ fuse_sync_release(ff, flags);
+ fuse_queue_forget(fc, forget, outentry.nodeid, 1);
+ return -ENOMEM;
+ }
+ kfree(forget);
+ d_instantiate(entry, inode);
+ fuse_change_entry_timeout(entry, &outentry);
+ fuse_invalidate_attr(dir);
+ file = lookup_instantiate_filp(nd, entry, generic_file_open);
+ if (IS_ERR(file)) {
+ fuse_sync_release(ff, flags);
+ return PTR_ERR(file);
+ }
+ file->private_data = fuse_file_get(ff);
+ fuse_finish_open(inode, file);
+ return 0;
+
+ out_free_ff:
+ fuse_file_free(ff);
+ out_put_request:
+ fuse_put_request(fc, req);
+ out_put_forget_req:
+ kfree(forget);
+ return err;
+}
+
+/*
+ * Code shared between mknod, mkdir, symlink and link
+ */
+static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+ struct inode *dir, struct dentry *entry,
+ int mode)
+{
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ int err;
+ struct fuse_forget_link *forget;
+
+ forget = fuse_alloc_forget();
+ if (!forget) {
+ fuse_put_request(fc, req);
+ return -ENOMEM;
+ }
+
+ memset(&outarg, 0, sizeof(outarg));
+ req->in.h.nodeid = get_node_id(dir);
+ req->out.numargs = 1;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err)
+ goto out_put_forget_req;
+
+ err = -EIO;
+ if (invalid_nodeid(outarg.nodeid))
+ goto out_put_forget_req;
+
+ if ((outarg.attr.mode ^ mode) & S_IFMT)
+ goto out_put_forget_req;
+
+ inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+ &outarg.attr, entry_attr_timeout(&outarg), 0);
+ if (!inode) {
+ fuse_queue_forget(fc, forget, outarg.nodeid, 1);
+ return -ENOMEM;
+ }
+ kfree(forget);
+
+ if (S_ISDIR(inode->i_mode)) {
+ struct dentry *alias;
+ mutex_lock(&fc->inst_mutex);
+ alias = d_find_alias(inode);
+ if (alias) {
+ /* New directory must have moved since mkdir */
+ mutex_unlock(&fc->inst_mutex);
+ dput(alias);
+ iput(inode);
+ return -EBUSY;
+ }
+ d_instantiate(entry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ } else
+ d_instantiate(entry, inode);
+
+ fuse_change_entry_timeout(entry, &outarg);
+ fuse_invalidate_attr(dir);
+ return 0;
+
+ out_put_forget_req:
+ kfree(forget);
+ return err;
+}
+
+static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+ dev_t rdev)
+{
+ struct fuse_mknod_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ inarg.rdev = new_encode_dev(rdev);
+ inarg.umask = current_umask();
+ req->in.h.opcode = FUSE_MKNOD;
+ req->in.numargs = 2;
+ req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+ sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, req, dir, entry, mode);
+}
+
+static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+ struct nameidata *nd)
+{
+ if (nd && (nd->flags & LOOKUP_OPEN)) {
+ int err = fuse_create_open(dir, entry, mode, nd);
+ if (err != -ENOSYS)
+ return err;
+ /* Fall back on mknod */
+ }
+ return fuse_mknod(dir, entry, mode, 0);
+}
+
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+{
+ struct fuse_mkdir_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ if (!fc->dont_mask)
+ mode &= ~current_umask();
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ inarg.umask = current_umask();
+ req->in.h.opcode = FUSE_MKDIR;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, req, dir, entry, S_IFDIR);
+}
+
+static int fuse_symlink(struct inode *dir, struct dentry *entry,
+ const char *link)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ unsigned len = strlen(link) + 1;
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = FUSE_SYMLINK;
+ req->in.numargs = 2;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ req->in.args[1].size = len;
+ req->in.args[1].value = link;
+ return create_new_entry(fc, req, dir, entry, S_IFLNK);
+}
+
+static int fuse_unlink(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = FUSE_UNLINK;
+ req->in.h.nodeid = get_node_id(dir);
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ struct inode *inode = entry->d_inode;
+
+ /*
+ * Set nlink to zero so the inode can be cleared, if the inode
+ * does have more links this will be discovered at the next
+ * lookup/getattr.
+ */
+ clear_nlink(inode);
+ fuse_invalidate_attr(inode);
+ fuse_invalidate_attr(dir);
+ fuse_invalidate_entry_cache(entry);
+ } else if (err == -EINTR)
+ fuse_invalidate_entry(entry);
+ return err;
+}
+
+static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = FUSE_RMDIR;
+ req->in.h.nodeid = get_node_id(dir);
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ clear_nlink(entry->d_inode);
+ fuse_invalidate_attr(dir);
+ fuse_invalidate_entry_cache(entry);
+ } else if (err == -EINTR)
+ fuse_invalidate_entry(entry);
+ return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ int err;
+ struct fuse_rename_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ struct fuse_req *req = fuse_get_req(fc);
+
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.newdir = get_node_id(newdir);
+ req->in.h.opcode = FUSE_RENAME;
+ req->in.h.nodeid = get_node_id(olddir);
+ req->in.numargs = 3;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = oldent->d_name.len + 1;
+ req->in.args[1].value = oldent->d_name.name;
+ req->in.args[2].size = newent->d_name.len + 1;
+ req->in.args[2].value = newent->d_name.name;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ /* ctime changes */
+ fuse_invalidate_attr(oldent->d_inode);
+
+ fuse_invalidate_attr(olddir);
+ if (olddir != newdir)
+ fuse_invalidate_attr(newdir);
+
+ /* newent will end up negative */
+ if (newent->d_inode) {
+ fuse_invalidate_attr(newent->d_inode);
+ fuse_invalidate_entry_cache(newent);
+ }
+ } else if (err == -EINTR) {
+ /* If request was interrupted, DEITY only knows if the
+ rename actually took place. If the invalidation
+ fails (e.g. some process has CWD under the renamed
+ directory), then there can be inconsistency between
+ the dcache and the real filesystem. Tough luck. */
+ fuse_invalidate_entry(oldent);
+ if (newent->d_inode)
+ fuse_invalidate_entry(newent);
+ }
+
+ return err;
+}
+
+static int fuse_link(struct dentry *entry, struct inode *newdir,
+ struct dentry *newent)
+{
+ int err;
+ struct fuse_link_in inarg;
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.oldnodeid = get_node_id(inode);
+ req->in.h.opcode = FUSE_LINK;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = newent->d_name.len + 1;
+ req->in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+ /* Contrary to "normal" filesystems it can happen that link
+ makes two "logical" inodes point to the same "physical"
+ inode. We invalidate the attributes of the old one, so it
+ will reflect changes in the backing inode (link count,
+ etc.)
+ */
+ if (!err || err == -EINTR)
+ fuse_invalidate_attr(inode);
+ return err;
+}
+
+static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
+ struct kstat *stat)
+{
+ stat->dev = inode->i_sb->s_dev;
+ stat->ino = attr->ino;
+ stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
+ stat->nlink = attr->nlink;
+ stat->uid = attr->uid;
+ stat->gid = attr->gid;
+ stat->rdev = inode->i_rdev;
+ stat->atime.tv_sec = attr->atime;
+ stat->atime.tv_nsec = attr->atimensec;
+ stat->mtime.tv_sec = attr->mtime;
+ stat->mtime.tv_nsec = attr->mtimensec;
+ stat->ctime.tv_sec = attr->ctime;
+ stat->ctime.tv_nsec = attr->ctimensec;
+ stat->size = attr->size;
+ stat->blocks = attr->blocks;
+ stat->blksize = (1 << inode->i_blkbits);
+}
+
+static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
+ struct file *file)
+{
+ int err;
+ struct fuse_getattr_in inarg;
+ struct fuse_attr_out outarg;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ u64 attr_version;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ attr_version = fuse_get_attr_version(fc);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ /* Directories have separate file-handle space */
+ if (file && S_ISREG(inode->i_mode)) {
+ struct fuse_file *ff = file->private_data;
+
+ inarg.getattr_flags |= FUSE_GETATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ req->in.h.opcode = FUSE_GETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ } else {
+ fuse_change_attributes(inode, &outarg.attr,
+ attr_timeout(&outarg),
+ attr_version);
+ if (stat)
+ fuse_fillattr(inode, &outarg.attr, stat);
+ }
+ }
+ return err;
+}
+
+int fuse_update_attributes(struct inode *inode, struct kstat *stat,
+ struct file *file, bool *refreshed)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ int err;
+ bool r;
+
+ if (fi->i_time < get_jiffies_64()) {
+ r = true;
+ err = fuse_do_getattr(inode, stat, file);
+ } else {
+ r = false;
+ err = 0;
+ if (stat) {
+ generic_fillattr(inode, stat);
+ stat->mode = fi->orig_i_mode;
+ stat->ino = fi->orig_ino;
+ }
+ }
+
+ if (refreshed != NULL)
+ *refreshed = r;
+
+ return err;
+}
+
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+ struct qstr *name)
+{
+ int err = -ENOTDIR;
+ struct inode *parent;
+ struct dentry *dir;
+ struct dentry *entry;
+
+ parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+ if (!parent)
+ return -ENOENT;
+
+ mutex_lock(&parent->i_mutex);
+ if (!S_ISDIR(parent->i_mode))
+ goto unlock;
+
+ err = -ENOENT;
+ dir = d_find_alias(parent);
+ if (!dir)
+ goto unlock;
+
+ entry = d_lookup(dir, name);
+ dput(dir);
+ if (!entry)
+ goto unlock;
+
+ fuse_invalidate_attr(parent);
+ fuse_invalidate_entry(entry);
+ dput(entry);
+ err = 0;
+
+ unlock:
+ mutex_unlock(&parent->i_mutex);
+ iput(parent);
+ return err;
+}
+
+/*
+ * Calling into a user-controlled filesystem gives the filesystem
+ * daemon ptrace-like capabilities over the requester process. This
+ * means, that the filesystem daemon is able to record the exact
+ * filesystem operations performed, and can also control the behavior
+ * of the requester process in otherwise impossible ways. For example
+ * it can delay the operation for arbitrary length of time allowing
+ * DoS against the requester.
+ *
+ * For this reason only those processes can call into the filesystem,
+ * for which the owner of the mount has ptrace privilege. This
+ * excludes processes started by other users, suid or sgid processes.
+ */
+int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+{
+ const struct cred *cred;
+ int ret;
+
+ if (fc->flags & FUSE_ALLOW_OTHER)
+ return 1;
+
+ rcu_read_lock();
+ ret = 0;
+ cred = __task_cred(task);
+ if (cred->euid == fc->user_id &&
+ cred->suid == fc->user_id &&
+ cred->uid == fc->user_id &&
+ cred->egid == fc->group_id &&
+ cred->sgid == fc->group_id &&
+ cred->gid == fc->group_id)
+ ret = 1;
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static int fuse_access(struct inode *inode, int mask)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_access_in inarg;
+ int err;
+
+ if (fc->no_access)
+ return 0;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
+ req->in.h.opcode = FUSE_ACCESS;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_access = 1;
+ err = 0;
+ }
+ return err;
+}
+
+static int fuse_perm_getattr(struct inode *inode, int flags)
+{
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ return fuse_do_getattr(inode, NULL, NULL);
+}
+
+/*
+ * Check permission. The two basic access models of FUSE are:
+ *
+ * 1) Local access checking ('default_permissions' mount option) based
+ * on file mode. This is the plain old disk filesystem permission
+ * modell.
+ *
+ * 2) "Remote" access checking, where server is responsible for
+ * checking permission in each inode operation. An exception to this
+ * is if ->permission() was invoked from sys_access() in which case an
+ * access request is sent. Execute permission is still checked
+ * locally based on file mode.
+ */
+static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ bool refreshed = false;
+ int err = 0;
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ /*
+ * If attributes are needed, refresh them before proceeding
+ */
+ if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
+ ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ if (fi->i_time < get_jiffies_64()) {
+ refreshed = true;
+
+ err = fuse_perm_getattr(inode, flags);
+ if (err)
+ return err;
+ }
+ }
+
+ if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ err = generic_permission(inode, mask, flags, NULL);
+
+ /* If permission is denied, try to refresh file
+ attributes. This is also needed, because the root
+ node will at first have no permissions */
+ if (err == -EACCES && !refreshed) {
+ err = fuse_perm_getattr(inode, flags);
+ if (!err)
+ err = generic_permission(inode, mask,
+ flags, NULL);
+ }
+
+ /* Note: the opposite of the above test does not
+ exist. So if permissions are revoked this won't be
+ noticed immediately, only after the attribute
+ timeout has expired */
+ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ err = fuse_access(inode, mask);
+ } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
+ if (!(inode->i_mode & S_IXUGO)) {
+ if (refreshed)
+ return -EACCES;
+
+ err = fuse_perm_getattr(inode, flags);
+ if (!err && !(inode->i_mode & S_IXUGO))
+ return -EACCES;
+ }
+ }
+ return err;
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+ void *dstbuf, filldir_t filldir)
+{
+ while (nbytes >= FUSE_NAME_OFFSET) {
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+ int over;
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+
+ over = filldir(dstbuf, dirent->name, dirent->namelen,
+ file->f_pos, dirent->ino, dirent->type);
+ if (over)
+ break;
+
+ buf += reclen;
+ nbytes -= reclen;
+ file->f_pos = dirent->off;
+ }
+
+ return 0;
+}
+
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+ int err;
+ size_t nbytes;
+ struct page *page;
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ fuse_put_request(fc, req);
+ return -ENOMEM;
+ }
+ req->out.argpages = 1;
+ req->num_pages = 1;
+ req->pages[0] = page;
+ fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+ fuse_request_send(fc, req);
+ nbytes = req->out.args[0].size;
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
+ filldir);
+
+ __free_page(page);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return err;
+}
+
+static char *read_link(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_req(fc);
+ char *link;
+
+ if (IS_ERR(req))
+ return ERR_CAST(req);
+
+ link = (char *) __get_free_page(GFP_KERNEL);
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ req->in.h.opcode = FUSE_READLINK;
+ req->in.h.nodeid = get_node_id(inode);
+ req->out.argvar = 1;
+ req->out.numargs = 1;
+ req->out.args[0].size = PAGE_SIZE - 1;
+ req->out.args[0].value = link;
+ fuse_request_send(fc, req);
+ if (req->out.h.error) {
+ free_page((unsigned long) link);
+ link = ERR_PTR(req->out.h.error);
+ } else
+ link[req->out.args[0].size] = '\0';
+ out:
+ fuse_put_request(fc, req);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return link;
+}
+
+static void free_link(char *link)
+{
+ if (!IS_ERR(link))
+ free_page((unsigned long) link);
+}
+
+static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ nd_set_link(nd, read_link(dentry));
+ return NULL;
+}
+
+static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+ free_link(nd_get_link(nd));
+}
+
+static int fuse_dir_open(struct inode *inode, struct file *file)
+{
+ return fuse_open_common(inode, file, true);
+}
+
+static int fuse_dir_release(struct inode *inode, struct file *file)
+{
+ fuse_release_common(file, FUSE_RELEASEDIR);
+
+ return 0;
+}
+
+static int fuse_dir_fsync(struct file *file, int datasync)
+{
+ return fuse_fsync_common(file, datasync, 1);
+}
+
+static bool update_mtime(unsigned ivalid)
+{
+ /* Always update if mtime is explicitly set */
+ if (ivalid & ATTR_MTIME_SET)
+ return true;
+
+ /* If it's an open(O_TRUNC) or an ftruncate(), don't update */
+ if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
+ return false;
+
+ /* In all other cases update */
+ return true;
+}
+
+static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
+{
+ unsigned ivalid = iattr->ia_valid;
+
+ if (ivalid & ATTR_MODE)
+ arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
+ if (ivalid & ATTR_UID)
+ arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid;
+ if (ivalid & ATTR_GID)
+ arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid;
+ if (ivalid & ATTR_SIZE)
+ arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
+ if (ivalid & ATTR_ATIME) {
+ arg->valid |= FATTR_ATIME;
+ arg->atime = iattr->ia_atime.tv_sec;
+ arg->atimensec = iattr->ia_atime.tv_nsec;
+ if (!(ivalid & ATTR_ATIME_SET))
+ arg->valid |= FATTR_ATIME_NOW;
+ }
+ if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) {
+ arg->valid |= FATTR_MTIME;
+ arg->mtime = iattr->ia_mtime.tv_sec;
+ arg->mtimensec = iattr->ia_mtime.tv_nsec;
+ if (!(ivalid & ATTR_MTIME_SET))
+ arg->valid |= FATTR_MTIME_NOW;
+ }
+}
+
+/*
+ * Prevent concurrent writepages on inode
+ *
+ * This is done by adding a negative bias to the inode write counter
+ * and waiting for all pending writes to finish.
+ */
+void fuse_set_nowrite(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ BUG_ON(!mutex_is_locked(&inode->i_mutex));
+
+ spin_lock(&fc->lock);
+ BUG_ON(fi->writectr < 0);
+ fi->writectr += FUSE_NOWRITE;
+ spin_unlock(&fc->lock);
+ wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
+}
+
+/*
+ * Allow writepages on inode
+ *
+ * Remove the bias from the writecounter and send any queued
+ * writepages.
+ */
+static void __fuse_release_nowrite(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ BUG_ON(fi->writectr != FUSE_NOWRITE);
+ fi->writectr = 0;
+ fuse_flush_writepages(inode);
+}
+
+void fuse_release_nowrite(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ spin_lock(&fc->lock);
+ __fuse_release_nowrite(inode);
+ spin_unlock(&fc->lock);
+}
+
+/*
+ * Set attributes, and at the same time refresh them.
+ *
+ * Truncation is slightly complicated, because the 'truncate' request
+ * may fail, in which case we don't want to touch the mapping.
+ * vmtruncate() doesn't allow for this case, so do the rlimit checking
+ * and the actual truncation by hand.
+ */
+static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
+ struct file *file)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_setattr_in inarg;
+ struct fuse_attr_out outarg;
+ bool is_truncate = false;
+ loff_t oldsize;
+ int err;
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+ attr->ia_valid |= ATTR_FORCE;
+
+ err = inode_change_ok(inode, attr);
+ if (err)
+ return err;
+
+ if (attr->ia_valid & ATTR_OPEN) {
+ if (fc->atomic_o_trunc)
+ return 0;
+ file = NULL;
+ }
+
+ if (attr->ia_valid & ATTR_SIZE)
+ is_truncate = true;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ if (is_truncate)
+ fuse_set_nowrite(inode);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ iattr_to_fattr(attr, &inarg);
+ if (file) {
+ struct fuse_file *ff = file->private_data;
+ inarg.valid |= FATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ if (attr->ia_valid & ATTR_SIZE) {
+ /* For mandatory locking in truncate */
+ inarg.valid |= FATTR_LOCKOWNER;
+ inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
+ }
+ req->in.h.opcode = FUSE_SETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ if (fc->minor < 9)
+ req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ else
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err) {
+ if (err == -EINTR)
+ fuse_invalidate_attr(inode);
+ goto error;
+ }
+
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ goto error;
+ }
+
+ spin_lock(&fc->lock);
+ fuse_change_attributes_common(inode, &outarg.attr,
+ attr_timeout(&outarg));
+ oldsize = inode->i_size;
+ i_size_write(inode, outarg.attr.size);
+
+ if (is_truncate) {
+ /* NOTE: this may release/reacquire fc->lock */
+ __fuse_release_nowrite(inode);
+ }
+ spin_unlock(&fc->lock);
+
+ /*
+ * Only call invalidate_inode_pages2() after removing
+ * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
+ */
+ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+ truncate_pagecache(inode, oldsize, outarg.attr.size);
+ invalidate_inode_pages2(inode->i_mapping);
+ }
+
+ return 0;
+
+error:
+ if (is_truncate)
+ fuse_release_nowrite(inode);
+
+ return err;
+}
+
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_FILE)
+ return fuse_do_setattr(entry, attr, attr->ia_file);
+ else
+ return fuse_do_setattr(entry, attr, NULL);
+}
+
+static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+ struct kstat *stat)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ return fuse_update_attributes(inode, stat, NULL, NULL);
+}
+
+static int fuse_setxattr(struct dentry *entry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_setxattr_in inarg;
+ int err;
+
+ if (fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ inarg.flags = flags;
+ req->in.h.opcode = FUSE_SETXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 3;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = strlen(name) + 1;
+ req->in.args[1].value = name;
+ req->in.args[2].size = size;
+ req->in.args[2].value = value;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_setxattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
+ void *value, size_t size)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_getxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ req->in.h.opcode = FUSE_GETXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = strlen(name) + 1;
+ req->in.args[1].value = name;
+ /* This is really two different operations rolled into one */
+ req->out.numargs = 1;
+ if (size) {
+ req->out.argvar = 1;
+ req->out.args[0].size = size;
+ req->out.args[0].value = value;
+ } else {
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ }
+ fuse_request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret)
+ ret = size ? req->out.args[0].size : outarg.size;
+ else {
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ }
+ fuse_put_request(fc, req);
+ return ret;
+}
+
+static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ if (fc->no_listxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ req->in.h.opcode = FUSE_LISTXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ /* This is really two different operations rolled into one */
+ req->out.numargs = 1;
+ if (size) {
+ req->out.argvar = 1;
+ req->out.args[0].size = size;
+ req->out.args[0].value = list;
+ } else {
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ }
+ fuse_request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret)
+ ret = size ? req->out.args[0].size : outarg.size;
+ else {
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ }
+ fuse_put_request(fc, req);
+ return ret;
+}
+
+static int fuse_removexattr(struct dentry *entry, const char *name)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ int err;
+
+ if (fc->no_removexattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = FUSE_REMOVEXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = strlen(name) + 1;
+ req->in.args[0].value = name;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_removexattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static const struct inode_operations fuse_dir_inode_operations = {
+ .lookup = fuse_lookup,
+ .mkdir = fuse_mkdir,
+ .symlink = fuse_symlink,
+ .unlink = fuse_unlink,
+ .rmdir = fuse_rmdir,
+ .rename = fuse_rename,
+ .link = fuse_link,
+ .setattr = fuse_setattr,
+ .create = fuse_create,
+ .mknod = fuse_mknod,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+static const struct file_operations fuse_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .readdir = fuse_readdir,
+ .open = fuse_dir_open,
+ .release = fuse_dir_release,
+ .fsync = fuse_dir_fsync,
+};
+
+static const struct inode_operations fuse_common_inode_operations = {
+ .setattr = fuse_setattr,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+static const struct inode_operations fuse_symlink_inode_operations = {
+ .setattr = fuse_setattr,
+ .follow_link = fuse_follow_link,
+ .put_link = fuse_put_link,
+ .readlink = generic_readlink,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+void fuse_init_common(struct inode *inode)
+{
+ inode->i_op = &fuse_common_inode_operations;
+}
+
+void fuse_init_dir(struct inode *inode)
+{
+ inode->i_op = &fuse_dir_inode_operations;
+ inode->i_fop = &fuse_dir_operations;
+}
+
+void fuse_init_symlink(struct inode *inode)
+{
+ inode->i_op = &fuse_symlink_inode_operations;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 00000000..82a66466
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,2186 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/compat.h>
+
+static const struct file_operations fuse_direct_io_file_operations;
+
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ int opcode, struct fuse_open_out *outargp)
+{
+ struct fuse_open_in inarg;
+ struct fuse_req *req;
+ int err;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
+ if (!fc->atomic_o_trunc)
+ inarg.flags &= ~O_TRUNC;
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(*outargp);
+ req->out.args[0].value = outargp;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+
+ return err;
+}
+
+struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
+{
+ struct fuse_file *ff;
+
+ ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+ if (unlikely(!ff))
+ return NULL;
+
+ ff->fc = fc;
+ ff->reserved_req = fuse_request_alloc();
+ if (unlikely(!ff->reserved_req)) {
+ kfree(ff);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&ff->write_entry);
+ atomic_set(&ff->count, 0);
+ RB_CLEAR_NODE(&ff->polled_node);
+ init_waitqueue_head(&ff->poll_wait);
+
+ spin_lock(&fc->lock);
+ ff->kh = ++fc->khctr;
+ spin_unlock(&fc->lock);
+
+ return ff;
+}
+
+void fuse_file_free(struct fuse_file *ff)
+{
+ fuse_request_free(ff->reserved_req);
+ kfree(ff);
+}
+
+struct fuse_file *fuse_file_get(struct fuse_file *ff)
+{
+ atomic_inc(&ff->count);
+ return ff;
+}
+
+static void fuse_release_async(struct work_struct *work)
+{
+ struct fuse_req *req;
+ struct fuse_conn *fc;
+ struct path path;
+
+ req = container_of(work, struct fuse_req, misc.release.work);
+ path = req->misc.release.path;
+ fc = get_fuse_conn(path.dentry->d_inode);
+
+ fuse_put_request(fc, req);
+ path_put(&path);
+}
+
+static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (fc->destroy_req) {
+ /*
+ * If this is a fuseblk mount, then it's possible that
+ * releasing the path will result in releasing the
+ * super block and sending the DESTROY request. If
+ * the server is single threaded, this would hang.
+ * For this reason do the path_put() in a separate
+ * thread.
+ */
+ atomic_inc(&req->count);
+ INIT_WORK(&req->misc.release.work, fuse_release_async);
+ schedule_work(&req->misc.release.work);
+ } else {
+ path_put(&req->misc.release.path);
+ }
+}
+
+static void fuse_file_put(struct fuse_file *ff, bool sync)
+{
+ if (atomic_dec_and_test(&ff->count)) {
+ struct fuse_req *req = ff->reserved_req;
+
+ if (sync) {
+ fuse_request_send(ff->fc, req);
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else {
+ req->end = fuse_release_end;
+ fuse_request_send_background(ff->fc, req);
+ }
+ kfree(ff);
+ }
+}
+
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir)
+{
+ struct fuse_open_out outarg;
+ struct fuse_file *ff;
+ int err;
+ int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+
+ ff = fuse_file_alloc(fc);
+ if (!ff)
+ return -ENOMEM;
+
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ if (err) {
+ fuse_file_free(ff);
+ return err;
+ }
+
+ if (isdir)
+ outarg.open_flags &= ~FOPEN_DIRECT_IO;
+
+ ff->fh = outarg.fh;
+ ff->nodeid = nodeid;
+ ff->open_flags = outarg.open_flags;
+ file->private_data = fuse_file_get(ff);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fuse_do_open);
+
+void fuse_finish_open(struct inode *inode, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (ff->open_flags & FOPEN_DIRECT_IO)
+ file->f_op = &fuse_direct_io_file_operations;
+ if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
+ if (ff->open_flags & FOPEN_NONSEEKABLE)
+ nonseekable_open(inode, file);
+ if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ i_size_write(inode, 0);
+ spin_unlock(&fc->lock);
+ fuse_invalidate_attr(inode);
+ }
+}
+
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ /* VFS checks this, but only _after_ ->open() */
+ if (file->f_flags & O_DIRECT)
+ return -EINVAL;
+
+ err = generic_file_open(inode, file);
+ if (err)
+ return err;
+
+ err = fuse_do_open(fc, get_node_id(inode), file, isdir);
+ if (err)
+ return err;
+
+ fuse_finish_open(inode, file);
+
+ return 0;
+}
+
+static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
+{
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_req *req = ff->reserved_req;
+ struct fuse_release_in *inarg = &req->misc.release.in;
+
+ spin_lock(&fc->lock);
+ list_del(&ff->write_entry);
+ if (!RB_EMPTY_NODE(&ff->polled_node))
+ rb_erase(&ff->polled_node, &fc->polled_files);
+ spin_unlock(&fc->lock);
+
+ wake_up_interruptible_all(&ff->poll_wait);
+
+ inarg->fh = ff->fh;
+ inarg->flags = flags;
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_release_in);
+ req->in.args[0].value = inarg;
+}
+
+void fuse_release_common(struct file *file, int opcode)
+{
+ struct fuse_file *ff;
+ struct fuse_req *req;
+
+ ff = file->private_data;
+ if (unlikely(!ff))
+ return;
+
+ req = ff->reserved_req;
+ fuse_prepare_release(ff, file->f_flags, opcode);
+
+ /* Hold vfsmount and dentry until release is finished */
+ path_get(&file->f_path);
+ req->misc.release.path = file->f_path;
+
+ /*
+ * Normally this will send the RELEASE request, however if
+ * some asynchronous READ or WRITE requests are outstanding,
+ * the sending will be delayed.
+ *
+ * Make the release synchronous if this is a fuseblk mount,
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
+ */
+ fuse_file_put(ff, ff->fc->destroy_req != NULL);
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
+{
+ return fuse_open_common(inode, file, false);
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+ fuse_release_common(file, FUSE_RELEASE);
+
+ /* return value is ignored by VFS */
+ return 0;
+}
+
+void fuse_sync_release(struct fuse_file *ff, int flags)
+{
+ WARN_ON(atomic_read(&ff->count) > 1);
+ fuse_prepare_release(ff, flags, FUSE_RELEASE);
+ ff->reserved_req->force = 1;
+ fuse_request_send(ff->fc, ff->reserved_req);
+ fuse_put_request(ff->fc, ff->reserved_req);
+ kfree(ff);
+}
+EXPORT_SYMBOL_GPL(fuse_sync_release);
+
+/*
+ * Scramble the ID space with XTEA, so that the value of the files_struct
+ * pointer is not exposed to userspace.
+ */
+u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
+{
+ u32 *k = fc->scramble_key;
+ u64 v = (unsigned long) id;
+ u32 v0 = v;
+ u32 v1 = v >> 32;
+ u32 sum = 0;
+ int i;
+
+ for (i = 0; i < 32; i++) {
+ v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
+ sum += 0x9E3779B9;
+ v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
+ }
+
+ return (u64) v0 + ((u64) v1 << 32);
+}
+
+/*
+ * Check if page is under writeback
+ *
+ * This is currently done by walking the list of writepage requests
+ * for the inode, which can be pretty inefficient.
+ */
+static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+ bool found = false;
+
+ spin_lock(&fc->lock);
+ list_for_each_entry(req, &fi->writepages, writepages_entry) {
+ pgoff_t curr_index;
+
+ BUG_ON(req->inode != inode);
+ curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ if (curr_index == index) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&fc->lock);
+
+ return found;
+}
+
+/*
+ * Wait for page writeback to be completed.
+ *
+ * Since fuse doesn't rely on the VM writeback tracking, this has to
+ * use some other means.
+ */
+static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
+ return 0;
+}
+
+static int fuse_flush(struct file *file, fl_owner_t id)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req;
+ struct fuse_flush_in inarg;
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if (fc->no_flush)
+ return 0;
+
+ req = fuse_get_req_nofail(fc, file);
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.lock_owner = fuse_lock_owner_id(fc, id);
+ req->in.h.opcode = FUSE_FLUSH;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->force = 1;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_flush = 1;
+ err = 0;
+ }
+ return err;
+}
+
+/*
+ * Wait for all pending writepages on the inode to finish.
+ *
+ * This is currently done by blocking further writes with FUSE_NOWRITE
+ * and waiting for all sent writes to complete.
+ *
+ * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
+ * could conflict with truncation.
+ */
+static void fuse_sync_writes(struct inode *inode)
+{
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
+}
+
+int fuse_fsync_common(struct file *file, int datasync, int isdir)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req;
+ struct fuse_fsync_in inarg;
+ int err;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+ return 0;
+
+ /*
+ * Start writeback against all dirty pages of the inode, then
+ * wait for all outstanding writes, before sending the FSYNC
+ * request.
+ */
+ err = write_inode_now(inode, 0);
+ if (err)
+ return err;
+
+ fuse_sync_writes(inode);
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.fsync_flags = datasync ? 1 : 0;
+ req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ if (isdir)
+ fc->no_fsyncdir = 1;
+ else
+ fc->no_fsync = 1;
+ err = 0;
+ }
+ return err;
+}
+
+static int fuse_fsync(struct file *file, int datasync)
+{
+ return fuse_fsync_common(file, datasync, 0);
+}
+
+void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
+ size_t count, int opcode)
+{
+ struct fuse_read_in *inarg = &req->misc.read.in;
+ struct fuse_file *ff = file->private_data;
+
+ inarg->fh = ff->fh;
+ inarg->offset = pos;
+ inarg->size = count;
+ inarg->flags = file->f_flags;
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_read_in);
+ req->in.args[0].value = inarg;
+ req->out.argvar = 1;
+ req->out.numargs = 1;
+ req->out.args[0].size = count;
+}
+
+static size_t fuse_send_read(struct fuse_req *req, struct file *file,
+ loff_t pos, size_t count, fl_owner_t owner)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+
+ fuse_read_fill(req, file, pos, count, FUSE_READ);
+ if (owner != NULL) {
+ struct fuse_read_in *inarg = &req->misc.read.in;
+
+ inarg->read_flags |= FUSE_READ_LOCKOWNER;
+ inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+ }
+ fuse_request_send(fc, req);
+ return req->out.args[0].size;
+}
+
+static void fuse_read_update_size(struct inode *inode, loff_t size,
+ u64 attr_ver)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ if (attr_ver == fi->attr_version && size < inode->i_size) {
+ fi->attr_version = ++fc->attr_version;
+ i_size_write(inode, size);
+ }
+ spin_unlock(&fc->lock);
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ size_t num_read;
+ loff_t pos = page_offset(page);
+ size_t count = PAGE_CACHE_SIZE;
+ u64 attr_ver;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ /*
+ * Page writeback can extend beyond the lifetime of the
+ * page-cache page, so make sure we read a properly synced
+ * page.
+ */
+ fuse_wait_on_page_writeback(inode, page->index);
+
+ req = fuse_get_req(fc);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out;
+
+ attr_ver = fuse_get_attr_version(fc);
+
+ req->out.page_zeroing = 1;
+ req->out.argpages = 1;
+ req->num_pages = 1;
+ req->pages[0] = page;
+ num_read = fuse_send_read(req, file, pos, count, NULL);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+
+ if (!err) {
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (num_read < count)
+ fuse_read_update_size(inode, pos + num_read, attr_ver);
+
+ SetPageUptodate(page);
+ }
+
+ fuse_invalidate_attr(inode); /* atime changed */
+ out:
+ unlock_page(page);
+ return err;
+}
+
+static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int i;
+ size_t count = req->misc.read.in.size;
+ size_t num_read = req->out.args[0].size;
+ struct address_space *mapping = NULL;
+
+ for (i = 0; mapping == NULL && i < req->num_pages; i++)
+ mapping = req->pages[i]->mapping;
+
+ if (mapping) {
+ struct inode *inode = mapping->host;
+
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (!req->out.h.error && num_read < count) {
+ loff_t pos;
+
+ pos = page_offset(req->pages[0]) + num_read;
+ fuse_read_update_size(inode, pos,
+ req->misc.read.attr_ver);
+ }
+ fuse_invalidate_attr(inode); /* atime changed */
+ }
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (!req->out.h.error)
+ SetPageUptodate(page);
+ else
+ SetPageError(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ if (req->ff)
+ fuse_file_put(req->ff, false);
+}
+
+static void fuse_send_readpages(struct fuse_req *req, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ loff_t pos = page_offset(req->pages[0]);
+ size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+
+ req->out.argpages = 1;
+ req->out.page_zeroing = 1;
+ req->out.page_replace = 1;
+ fuse_read_fill(req, file, pos, count, FUSE_READ);
+ req->misc.read.attr_ver = fuse_get_attr_version(fc);
+ if (fc->async_read) {
+ req->ff = fuse_file_get(ff);
+ req->end = fuse_readpages_end;
+ fuse_request_send_background(fc, req);
+ } else {
+ fuse_request_send(fc, req);
+ fuse_readpages_end(fc, req);
+ fuse_put_request(fc, req);
+ }
+}
+
+struct fuse_fill_data {
+ struct fuse_req *req;
+ struct file *file;
+ struct inode *inode;
+};
+
+static int fuse_readpages_fill(void *_data, struct page *page)
+{
+ struct fuse_fill_data *data = _data;
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ fuse_wait_on_page_writeback(inode, page->index);
+
+ if (req->num_pages &&
+ (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+ req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+ fuse_send_readpages(req, data->file);
+ data->req = req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ unlock_page(page);
+ return PTR_ERR(req);
+ }
+ }
+ page_cache_get(page);
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+ return 0;
+}
+
+static int fuse_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_fill_data data;
+ int err;
+
+ err = -EIO;
+ if (is_bad_inode(inode))
+ goto out;
+
+ data.file = file;
+ data.inode = inode;
+ data.req = fuse_get_req(fc);
+ err = PTR_ERR(data.req);
+ if (IS_ERR(data.req))
+ goto out;
+
+ err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
+ if (!err) {
+ if (data.req->num_pages)
+ fuse_send_readpages(data.req, file);
+ else
+ fuse_put_request(fc, data.req);
+ }
+out:
+ return err;
+}
+
+static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+
+ if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) {
+ int err;
+ /*
+ * If trying to read past EOF, make sure the i_size
+ * attribute is up-to-date.
+ */
+ err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
+ if (err)
+ return err;
+ }
+
+ return generic_file_aio_read(iocb, iov, nr_segs, pos);
+}
+
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
+ loff_t pos, size_t count)
+{
+ struct fuse_write_in *inarg = &req->misc.write.in;
+ struct fuse_write_out *outarg = &req->misc.write.out;
+
+ inarg->fh = ff->fh;
+ inarg->offset = pos;
+ inarg->size = count;
+ req->in.h.opcode = FUSE_WRITE;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 2;
+ if (ff->fc->minor < 9)
+ req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
+ else
+ req->in.args[0].size = sizeof(struct fuse_write_in);
+ req->in.args[0].value = inarg;
+ req->in.args[1].size = count;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(struct fuse_write_out);
+ req->out.args[0].value = outarg;
+}
+
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+ loff_t pos, size_t count, fl_owner_t owner)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_write_in *inarg = &req->misc.write.in;
+
+ fuse_write_fill(req, ff, pos, count);
+ inarg->flags = file->f_flags;
+ if (owner != NULL) {
+ inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
+ inarg->lock_owner = fuse_lock_owner_id(fc, owner);
+ }
+ fuse_request_send(fc, req);
+ return req->misc.write.out.size;
+}
+
+static int fuse_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+ *pagep = grab_cache_page_write_begin(mapping, index, flags);
+ if (!*pagep)
+ return -ENOMEM;
+ return 0;
+}
+
+void fuse_write_update_size(struct inode *inode, loff_t pos)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ if (pos > inode->i_size)
+ i_size_write(inode, pos);
+ spin_unlock(&fc->lock);
+}
+
+static int fuse_buffered_write(struct file *file, struct inode *inode,
+ loff_t pos, unsigned count, struct page *page)
+{
+ int err;
+ size_t nres;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ struct fuse_req *req;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ /*
+ * Make sure writepages on the same page are not mixed up with
+ * plain writes.
+ */
+ fuse_wait_on_page_writeback(inode, page->index);
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.argpages = 1;
+ req->num_pages = 1;
+ req->pages[0] = page;
+ req->page_offset = offset;
+ nres = fuse_send_write(req, file, pos, count, NULL);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err && !nres)
+ err = -EIO;
+ if (!err) {
+ pos += nres;
+ fuse_write_update_size(inode, pos);
+ if (count == PAGE_CACHE_SIZE)
+ SetPageUptodate(page);
+ }
+ fuse_invalidate_attr(inode);
+ return err ? err : nres;
+}
+
+static int fuse_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ int res = 0;
+
+ if (copied)
+ res = fuse_buffered_write(file, inode, pos, copied, page);
+
+ unlock_page(page);
+ page_cache_release(page);
+ return res;
+}
+
+static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos,
+ size_t count)
+{
+ size_t res;
+ unsigned offset;
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++)
+ fuse_wait_on_page_writeback(inode, req->pages[i]->index);
+
+ res = fuse_send_write(req, file, pos, count, NULL);
+
+ offset = req->page_offset;
+ count = res;
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+
+ if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+ SetPageUptodate(page);
+
+ if (count > PAGE_CACHE_SIZE - offset)
+ count -= PAGE_CACHE_SIZE - offset;
+ else
+ count = 0;
+ offset = 0;
+
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ return res;
+}
+
+static ssize_t fuse_fill_write_pages(struct fuse_req *req,
+ struct address_space *mapping,
+ struct iov_iter *ii, loff_t pos)
+{
+ struct fuse_conn *fc = get_fuse_conn(mapping->host);
+ unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ size_t count = 0;
+ int err;
+
+ req->in.argpages = 1;
+ req->page_offset = offset;
+
+ do {
+ size_t tmp;
+ struct page *page;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+ iov_iter_count(ii));
+
+ bytes = min_t(size_t, bytes, fc->max_write - count);
+
+ again:
+ err = -EFAULT;
+ if (iov_iter_fault_in_readable(ii, bytes))
+ break;
+
+ err = -ENOMEM;
+ page = grab_cache_page_write_begin(mapping, index, 0);
+ if (!page)
+ break;
+
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
+ pagefault_disable();
+ tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+ pagefault_enable();
+ flush_dcache_page(page);
+
+ if (!tmp) {
+ unlock_page(page);
+ page_cache_release(page);
+ bytes = min(bytes, iov_iter_single_seg_count(ii));
+ goto again;
+ }
+
+ err = 0;
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ iov_iter_advance(ii, tmp);
+ count += tmp;
+ pos += tmp;
+ offset += tmp;
+ if (offset == PAGE_CACHE_SIZE)
+ offset = 0;
+
+ if (!fc->big_writes)
+ break;
+ } while (iov_iter_count(ii) && count < fc->max_write &&
+ req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+
+ return count > 0 ? count : err;
+}
+
+static ssize_t fuse_perform_write(struct file *file,
+ struct address_space *mapping,
+ struct iov_iter *ii, loff_t pos)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err = 0;
+ ssize_t res = 0;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ do {
+ struct fuse_req *req;
+ ssize_t count;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ break;
+ }
+
+ count = fuse_fill_write_pages(req, mapping, ii, pos);
+ if (count <= 0) {
+ err = count;
+ } else {
+ size_t num_written;
+
+ num_written = fuse_send_write_pages(req, file, inode,
+ pos, count);
+ err = req->out.h.error;
+ if (!err) {
+ res += num_written;
+ pos += num_written;
+
+ /* break out of the loop on short write */
+ if (num_written != count)
+ err = -EIO;
+ }
+ }
+ fuse_put_request(fc, req);
+ } while (!err && iov_iter_count(ii));
+
+ if (res > 0)
+ fuse_write_update_size(inode, pos);
+
+ fuse_invalidate_attr(inode);
+
+ return res > 0 ? res : err;
+}
+
+static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ size_t count = 0;
+ ssize_t written = 0;
+ struct inode *inode = mapping->host;
+ ssize_t err;
+ struct iov_iter i;
+
+ WARN_ON(iocb->ki_pos != pos);
+
+ err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = mapping->backing_dev_info;
+
+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+ if (err)
+ goto out;
+
+ if (count == 0)
+ goto out;
+
+ err = file_remove_suid(file);
+ if (err)
+ goto out;
+
+ file_update_time(file);
+
+ iov_iter_init(&i, iov, nr_segs, count, 0);
+ written = fuse_perform_write(file, mapping, &i, pos);
+ if (written >= 0)
+ iocb->ki_pos = pos + written;
+
+out:
+ current->backing_dev_info = NULL;
+ mutex_unlock(&inode->i_mutex);
+
+ return written ? written : err;
+}
+
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (write)
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+ size_t *nbytesp, int write)
+{
+ size_t nbytes = *nbytesp;
+ unsigned long user_addr = (unsigned long) buf;
+ unsigned offset = user_addr & ~PAGE_MASK;
+ int npages;
+
+ /* Special case for kernel I/O: can copy directly into the buffer */
+ if (segment_eq(get_fs(), KERNEL_DS)) {
+ if (write)
+ req->in.args[1].value = (void *) user_addr;
+ else
+ req->out.args[0].value = (void *) user_addr;
+
+ return 0;
+ }
+
+ nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+ npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
+ npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
+ if (npages < 0)
+ return npages;
+
+ req->num_pages = npages;
+ req->page_offset = offset;
+
+ if (write)
+ req->in.argpages = 1;
+ else
+ req->out.argpages = 1;
+
+ nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+ *nbytesp = min(*nbytesp, nbytes);
+
+ return 0;
+}
+
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ size_t nmax = write ? fc->max_write : fc->max_read;
+ loff_t pos = *ppos;
+ ssize_t res = 0;
+ struct fuse_req *req;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ while (count) {
+ size_t nres;
+ fl_owner_t owner = current->files;
+ size_t nbytes = min(count, nmax);
+ int err = fuse_get_user_pages(req, buf, &nbytes, write);
+ if (err) {
+ res = err;
+ break;
+ }
+
+ if (write)
+ nres = fuse_send_write(req, file, pos, nbytes, owner);
+ else
+ nres = fuse_send_read(req, file, pos, nbytes, owner);
+
+ fuse_release_user_pages(req, !write);
+ if (req->out.h.error) {
+ if (!res)
+ res = req->out.h.error;
+ break;
+ } else if (nres > nbytes) {
+ res = -EIO;
+ break;
+ }
+ count -= nres;
+ res += nres;
+ pos += nres;
+ buf += nres;
+ if (nres != nbytes)
+ break;
+ if (count) {
+ fuse_put_request(fc, req);
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ break;
+ }
+ }
+ if (!IS_ERR(req))
+ fuse_put_request(fc, req);
+ if (res > 0)
+ *ppos = pos;
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(fuse_direct_io);
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ ssize_t res;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ res = fuse_direct_io(file, buf, count, ppos, 0);
+
+ fuse_invalidate_attr(inode);
+
+ return res;
+}
+
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ ssize_t res;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ /* Don't allow parallel writes to the same file */
+ mutex_lock(&inode->i_mutex);
+ res = generic_write_checks(file, ppos, &count, 0);
+ if (!res) {
+ res = fuse_direct_io(file, buf, count, ppos, 1);
+ if (res > 0)
+ fuse_write_update_size(inode, *ppos);
+ }
+ mutex_unlock(&inode->i_mutex);
+
+ fuse_invalidate_attr(inode);
+
+ return res;
+}
+
+static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
+{
+ __free_page(req->pages[0]);
+ fuse_file_put(req->ff, false);
+}
+
+static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct inode *inode = req->inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+
+ list_del(&req->writepages_entry);
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
+ wake_up(&fi->page_waitq);
+}
+
+/* Called under fc->lock, may release and reacquire it */
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ struct fuse_inode *fi = get_fuse_inode(req->inode);
+ loff_t size = i_size_read(req->inode);
+ struct fuse_write_in *inarg = &req->misc.write.in;
+
+ if (!fc->connected)
+ goto out_free;
+
+ if (inarg->offset + PAGE_CACHE_SIZE <= size) {
+ inarg->size = PAGE_CACHE_SIZE;
+ } else if (inarg->offset < size) {
+ inarg->size = size & (PAGE_CACHE_SIZE - 1);
+ } else {
+ /* Got truncated off completely */
+ goto out_free;
+ }
+
+ req->in.args[1].size = inarg->size;
+ fi->writectr++;
+ fuse_request_send_background_locked(fc, req);
+ return;
+
+ out_free:
+ fuse_writepage_finish(fc, req);
+ spin_unlock(&fc->lock);
+ fuse_writepage_free(fc, req);
+ fuse_put_request(fc, req);
+ spin_lock(&fc->lock);
+}
+
+/*
+ * If fi->writectr is positive (no truncate or fsync going on) send
+ * all queued writepage requests.
+ *
+ * Called with fc->lock
+ */
+void fuse_flush_writepages(struct inode *inode)
+__releases(fc->lock)
+__acquires(fc->lock)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+
+ while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
+ req = list_entry(fi->queued_writes.next, struct fuse_req, list);
+ list_del_init(&req->list);
+ fuse_send_writepage(fc, req);
+ }
+}
+
+static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct inode *inode = req->inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ mapping_set_error(inode->i_mapping, req->out.h.error);
+ spin_lock(&fc->lock);
+ fi->writectr--;
+ fuse_writepage_finish(fc, req);
+ spin_unlock(&fc->lock);
+ fuse_writepage_free(fc, req);
+}
+
+static int fuse_writepage_locked(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+ struct fuse_file *ff;
+ struct page *tmp_page;
+
+ set_page_writeback(page);
+
+ req = fuse_request_alloc_nofs();
+ if (!req)
+ goto err;
+
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (!tmp_page)
+ goto err_free;
+
+ spin_lock(&fc->lock);
+ BUG_ON(list_empty(&fi->write_files));
+ ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
+ req->ff = fuse_file_get(ff);
+ spin_unlock(&fc->lock);
+
+ fuse_write_fill(req, ff, page_offset(page), 0);
+
+ copy_highpage(tmp_page, page);
+ req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+ req->in.argpages = 1;
+ req->num_pages = 1;
+ req->pages[0] = tmp_page;
+ req->page_offset = 0;
+ req->end = fuse_writepage_end;
+ req->inode = inode;
+
+ inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
+ inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ end_page_writeback(page);
+
+ spin_lock(&fc->lock);
+ list_add(&req->writepages_entry, &fi->writepages);
+ list_add_tail(&req->list, &fi->queued_writes);
+ fuse_flush_writepages(inode);
+ spin_unlock(&fc->lock);
+
+ return 0;
+
+err_free:
+ fuse_request_free(req);
+err:
+ end_page_writeback(page);
+ return -ENOMEM;
+}
+
+static int fuse_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int err;
+
+ err = fuse_writepage_locked(page);
+ unlock_page(page);
+
+ return err;
+}
+
+static int fuse_launder_page(struct page *page)
+{
+ int err = 0;
+ if (clear_page_dirty_for_io(page)) {
+ struct inode *inode = page->mapping->host;
+ err = fuse_writepage_locked(page);
+ if (!err)
+ fuse_wait_on_page_writeback(inode, page->index);
+ }
+ return err;
+}
+
+/*
+ * Write back dirty pages now, because there may not be any suitable
+ * open files later
+ */
+static void fuse_vma_close(struct vm_area_struct *vma)
+{
+ filemap_write_and_wait(vma->vm_file->f_mapping);
+}
+
+/*
+ * Wait for writeback against this page to complete before allowing it
+ * to be marked dirty again, and hence written back again, possibly
+ * before the previous writepage completed.
+ *
+ * Block here, instead of in ->writepage(), so that the userspace fs
+ * can only block processes actually operating on the filesystem.
+ *
+ * Otherwise unprivileged userspace fs would be able to block
+ * unrelated:
+ *
+ * - page migration
+ * - sync(2)
+ * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
+ */
+static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *page = vmf->page;
+ /*
+ * Don't use page->mapping as it may become NULL from a
+ * concurrent truncate.
+ */
+ struct inode *inode = vma->vm_file->f_mapping->host;
+
+ fuse_wait_on_page_writeback(inode, page->index);
+ return 0;
+}
+
+static const struct vm_operations_struct fuse_file_vm_ops = {
+ .close = fuse_vma_close,
+ .fault = filemap_fault,
+ .page_mkwrite = fuse_page_mkwrite,
+};
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_file *ff = file->private_data;
+ /*
+ * file may be written through mmap, so chain it onto the
+ * inodes's write_file list
+ */
+ spin_lock(&fc->lock);
+ if (list_empty(&ff->write_entry))
+ list_add(&ff->write_entry, &fi->write_files);
+ spin_unlock(&fc->lock);
+ }
+ file_accessed(file);
+ vma->vm_ops = &fuse_file_vm_ops;
+ return 0;
+}
+
+static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /* Can't provide the coherency needed for MAP_SHARED */
+ if (vma->vm_flags & VM_MAYSHARE)
+ return -ENODEV;
+
+ invalidate_inode_pages2(file->f_mapping);
+
+ return generic_file_mmap(file, vma);
+}
+
+static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
+ struct file_lock *fl)
+{
+ switch (ffl->type) {
+ case F_UNLCK:
+ break;
+
+ case F_RDLCK:
+ case F_WRLCK:
+ if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX ||
+ ffl->end < ffl->start)
+ return -EIO;
+
+ fl->fl_start = ffl->start;
+ fl->fl_end = ffl->end;
+ fl->fl_pid = ffl->pid;
+ break;
+
+ default:
+ return -EIO;
+ }
+ fl->fl_type = ffl->type;
+ return 0;
+}
+
+static void fuse_lk_fill(struct fuse_req *req, struct file *file,
+ const struct file_lock *fl, int opcode, pid_t pid,
+ int flock)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_lk_in *arg = &req->misc.lk_in;
+
+ arg->fh = ff->fh;
+ arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ arg->lk.start = fl->fl_start;
+ arg->lk.end = fl->fl_end;
+ arg->lk.type = fl->fl_type;
+ arg->lk.pid = pid;
+ if (flock)
+ arg->lk_flags |= FUSE_LK_FLOCK;
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*arg);
+ req->in.args[0].value = arg;
+}
+
+static int fuse_getlk(struct file *file, struct file_lock *fl)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_lk_out outarg;
+ int err;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ err = convert_fuse_file_lock(&outarg.lk, fl);
+
+ return err;
+}
+
+static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
+ pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
+ int err;
+
+ if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+ /* NLM needs asynchronous locks, which we don't support yet */
+ return -ENOLCK;
+ }
+
+ /* Unlock on close is handled by the flush method */
+ if (fl->fl_flags & FL_CLOSE)
+ return 0;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ fuse_lk_fill(req, file, fl, opcode, pid, flock);
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ /* locking is restartable */
+ if (err == -EINTR)
+ err = -ERESTARTSYS;
+ fuse_put_request(fc, req);
+ return err;
+}
+
+static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (cmd == F_CANCELLK) {
+ err = 0;
+ } else if (cmd == F_GETLK) {
+ if (fc->no_lock) {
+ posix_test_lock(file, fl);
+ err = 0;
+ } else
+ err = fuse_getlk(file, fl);
+ } else {
+ if (fc->no_lock)
+ err = posix_lock_file(file, fl, NULL);
+ else
+ err = fuse_setlk(file, fl, 0);
+ }
+ return err;
+}
+
+static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ if (fc->no_lock) {
+ err = flock_lock_file_wait(file, fl);
+ } else {
+ /* emulate flock with POSIX locks */
+ fl->fl_owner = (fl_owner_t) file;
+ err = fuse_setlk(file, fl, 1);
+ }
+
+ return err;
+}
+
+static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_bmap_in inarg;
+ struct fuse_bmap_out outarg;
+ int err;
+
+ if (!inode->i_sb->s_bdev || fc->no_bmap)
+ return 0;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return 0;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.block = block;
+ inarg.blocksize = inode->i_sb->s_blocksize;
+ req->in.h.opcode = FUSE_BMAP;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS)
+ fc->no_bmap = 1;
+
+ return err ? 0 : outarg.block;
+}
+
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t retval;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ retval = fuse_update_attributes(inode, NULL, file, NULL);
+ if (retval)
+ goto exit;
+ offset += i_size_read(inode);
+ break;
+ case SEEK_CUR:
+ offset += file->f_pos;
+ }
+ retval = -EINVAL;
+ if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ retval = offset;
+ }
+exit:
+ mutex_unlock(&inode->i_mutex);
+ return retval;
+}
+
+static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
+ unsigned int nr_segs, size_t bytes, bool to_user)
+{
+ struct iov_iter ii;
+ int page_idx = 0;
+
+ if (!bytes)
+ return 0;
+
+ iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+
+ while (iov_iter_count(&ii)) {
+ struct page *page = pages[page_idx++];
+ size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
+ void *kaddr;
+
+ kaddr = kmap(page);
+
+ while (todo) {
+ char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
+ size_t iov_len = ii.iov->iov_len - ii.iov_offset;
+ size_t copy = min(todo, iov_len);
+ size_t left;
+
+ if (!to_user)
+ left = copy_from_user(kaddr, uaddr, copy);
+ else
+ left = copy_to_user(uaddr, kaddr, copy);
+
+ if (unlikely(left))
+ return -EFAULT;
+
+ iov_iter_advance(&ii, copy);
+ todo -= copy;
+ kaddr += copy;
+ }
+
+ kunmap(page);
+ }
+
+ return 0;
+}
+
+/*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+ * and 64bit. Fortunately we can determine which structure the server
+ * used from the size of the reply.
+ */
+static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
+ size_t transferred, unsigned count,
+ bool is_compat)
+{
+#ifdef CONFIG_COMPAT
+ if (count * sizeof(struct compat_iovec) == transferred) {
+ struct compat_iovec *ciov = src;
+ unsigned i;
+
+ /*
+ * With this interface a 32bit server cannot support
+ * non-compat (i.e. ones coming from 64bit apps) ioctl
+ * requests
+ */
+ if (!is_compat)
+ return -EINVAL;
+
+ for (i = 0; i < count; i++) {
+ dst[i].iov_base = compat_ptr(ciov[i].iov_base);
+ dst[i].iov_len = ciov[i].iov_len;
+ }
+ return 0;
+ }
+#endif
+
+ if (count * sizeof(struct iovec) != transferred)
+ return -EIO;
+
+ memcpy(dst, src, transferred);
+ return 0;
+}
+
+/* Make sure iov_length() won't overflow */
+static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+{
+ size_t n;
+ u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+
+ for (n = 0; n < count; n++) {
+ if (iov->iov_len > (size_t) max)
+ return -ENOMEM;
+ max -= iov->iov_len;
+ }
+ return 0;
+}
+
+static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
+ void *src, size_t transferred, unsigned count,
+ bool is_compat)
+{
+ unsigned i;
+ struct fuse_ioctl_iovec *fiov = src;
+
+ if (fc->minor < 16) {
+ return fuse_copy_ioctl_iovec_old(dst, src, transferred,
+ count, is_compat);
+ }
+
+ if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
+ return -EIO;
+
+ for (i = 0; i < count; i++) {
+ /* Did the server supply an inappropriate value? */
+ if (fiov[i].base != (unsigned long) fiov[i].base ||
+ fiov[i].len != (unsigned long) fiov[i].len)
+ return -EIO;
+
+ dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
+ dst[i].iov_len = (size_t) fiov[i].len;
+
+#ifdef CONFIG_COMPAT
+ if (is_compat &&
+ (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
+ (compat_size_t) dst[i].iov_len != fiov[i].len))
+ return -EIO;
+#endif
+ }
+
+ return 0;
+}
+
+
+/*
+ * For ioctls, there is no generic way to determine how much memory
+ * needs to be read and/or written. Furthermore, ioctls are allowed
+ * to dereference the passed pointer, so the parameter requires deep
+ * copying but FUSE has no idea whatsoever about what to copy in or
+ * out.
+ *
+ * This is solved by allowing FUSE server to retry ioctl with
+ * necessary in/out iovecs. Let's assume the ioctl implementation
+ * needs to read in the following structure.
+ *
+ * struct a {
+ * char *buf;
+ * size_t buflen;
+ * }
+ *
+ * On the first callout to FUSE server, inarg->in_size and
+ * inarg->out_size will be NULL; then, the server completes the ioctl
+ * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
+ * the actual iov array to
+ *
+ * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } }
+ *
+ * which tells FUSE to copy in the requested area and retry the ioctl.
+ * On the second round, the server has access to the structure and
+ * from that it can tell what to look for next, so on the invocation,
+ * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
+ *
+ * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) },
+ * { .iov_base = a.buf, .iov_len = a.buflen } }
+ *
+ * FUSE will copy both struct a and the pointed buffer from the
+ * process doing the ioctl and retry ioctl with both struct a and the
+ * buffer.
+ *
+ * This time, FUSE server has everything it needs and completes ioctl
+ * without FUSE_IOCTL_RETRY which finishes the ioctl call.
+ *
+ * Copying data out works the same way.
+ *
+ * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
+ * automatically initializes in and out iovs by decoding @cmd with
+ * _IOC_* macros and the server is not allowed to request RETRY. This
+ * limits ioctl data transfers to well-formed ioctls and is the forced
+ * behavior for all FUSE servers.
+ */
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_ioctl_in inarg = {
+ .fh = ff->fh,
+ .cmd = cmd,
+ .arg = arg,
+ .flags = flags
+ };
+ struct fuse_ioctl_out outarg;
+ struct fuse_req *req = NULL;
+ struct page **pages = NULL;
+ struct iovec *iov_page = NULL;
+ struct iovec *in_iov = NULL, *out_iov = NULL;
+ unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
+ size_t in_size, out_size, transferred;
+ int err;
+
+#if BITS_PER_LONG == 32
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#else
+ if (flags & FUSE_IOCTL_COMPAT)
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#endif
+
+ /* assume all the iovs returned by client always fits in a page */
+ BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+
+ err = -ENOMEM;
+ pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
+ iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
+ if (!pages || !iov_page)
+ goto out;
+
+ /*
+ * If restricted, initialize IO parameters as encoded in @cmd.
+ * RETRY from server is not allowed.
+ */
+ if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
+ struct iovec *iov = iov_page;
+
+ iov->iov_base = (void __user *)arg;
+ iov->iov_len = _IOC_SIZE(cmd);
+
+ if (_IOC_DIR(cmd) & _IOC_WRITE) {
+ in_iov = iov;
+ in_iovs = 1;
+ }
+
+ if (_IOC_DIR(cmd) & _IOC_READ) {
+ out_iov = iov;
+ out_iovs = 1;
+ }
+ }
+
+ retry:
+ inarg.in_size = in_size = iov_length(in_iov, in_iovs);
+ inarg.out_size = out_size = iov_length(out_iov, out_iovs);
+
+ /*
+ * Out data can be used either for actual out data or iovs,
+ * make sure there always is at least one page.
+ */
+ out_size = max_t(size_t, out_size, PAGE_SIZE);
+ max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
+
+ /* make sure there are enough buffer pages and init request with them */
+ err = -ENOMEM;
+ if (max_pages > FUSE_MAX_PAGES_PER_REQ)
+ goto out;
+ while (num_pages < max_pages) {
+ pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!pages[num_pages])
+ goto out;
+ num_pages++;
+ }
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ req = NULL;
+ goto out;
+ }
+ memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
+ req->num_pages = num_pages;
+
+ /* okay, let's send it to the client */
+ req->in.h.opcode = FUSE_IOCTL;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ if (in_size) {
+ req->in.numargs++;
+ req->in.args[1].size = in_size;
+ req->in.argpages = 1;
+
+ err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
+ false);
+ if (err)
+ goto out;
+ }
+
+ req->out.numargs = 2;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ req->out.args[1].size = out_size;
+ req->out.argpages = 1;
+ req->out.argvar = 1;
+
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ transferred = req->out.args[1].size;
+ fuse_put_request(fc, req);
+ req = NULL;
+ if (err)
+ goto out;
+
+ /* did it ask for retry? */
+ if (outarg.flags & FUSE_IOCTL_RETRY) {
+ void *vaddr;
+
+ /* no retry if in restricted mode */
+ err = -EIO;
+ if (!(flags & FUSE_IOCTL_UNRESTRICTED))
+ goto out;
+
+ in_iovs = outarg.in_iovs;
+ out_iovs = outarg.out_iovs;
+
+ /*
+ * Make sure things are in boundary, separate checks
+ * are to protect against overflow.
+ */
+ err = -ENOMEM;
+ if (in_iovs > FUSE_IOCTL_MAX_IOV ||
+ out_iovs > FUSE_IOCTL_MAX_IOV ||
+ in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
+ goto out;
+
+ vaddr = kmap_atomic(pages[0], KM_USER0);
+ err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
+ transferred, in_iovs + out_iovs,
+ (flags & FUSE_IOCTL_COMPAT) != 0);
+ kunmap_atomic(vaddr, KM_USER0);
+ if (err)
+ goto out;
+
+ in_iov = iov_page;
+ out_iov = in_iov + in_iovs;
+
+ err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+ if (err)
+ goto out;
+
+ err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+ if (err)
+ goto out;
+
+ goto retry;
+ }
+
+ err = -EIO;
+ if (transferred > inarg.out_size)
+ goto out;
+
+ err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
+ out:
+ if (req)
+ fuse_put_request(fc, req);
+ free_page((unsigned long) iov_page);
+ while (num_pages)
+ __free_page(pages[--num_pages]);
+ kfree(pages);
+
+ return err ? err : outarg.result;
+}
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
+
+static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int flags)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long fuse_file_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return fuse_file_ioctl_common(file, cmd, arg, 0);
+}
+
+static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+}
+
+/*
+ * All files which have been polled are linked to RB tree
+ * fuse_conn->polled_files which is indexed by kh. Walk the tree and
+ * find the matching one.
+ */
+static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh,
+ struct rb_node **parent_out)
+{
+ struct rb_node **link = &fc->polled_files.rb_node;
+ struct rb_node *last = NULL;
+
+ while (*link) {
+ struct fuse_file *ff;
+
+ last = *link;
+ ff = rb_entry(last, struct fuse_file, polled_node);
+
+ if (kh < ff->kh)
+ link = &last->rb_left;
+ else if (kh > ff->kh)
+ link = &last->rb_right;
+ else
+ return link;
+ }
+
+ if (parent_out)
+ *parent_out = last;
+ return link;
+}
+
+/*
+ * The file is about to be polled. Make sure it's on the polled_files
+ * RB tree. Note that files once added to the polled_files tree are
+ * not removed before the file is released. This is because a file
+ * polled once is likely to be polled again.
+ */
+static void fuse_register_polled_file(struct fuse_conn *fc,
+ struct fuse_file *ff)
+{
+ spin_lock(&fc->lock);
+ if (RB_EMPTY_NODE(&ff->polled_node)) {
+ struct rb_node **link, *parent;
+
+ link = fuse_find_polled_node(fc, ff->kh, &parent);
+ BUG_ON(*link);
+ rb_link_node(&ff->polled_node, parent, link);
+ rb_insert_color(&ff->polled_node, &fc->polled_files);
+ }
+ spin_unlock(&fc->lock);
+}
+
+unsigned fuse_file_poll(struct file *file, poll_table *wait)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
+ struct fuse_poll_out outarg;
+ struct fuse_req *req;
+ int err;
+
+ if (fc->no_poll)
+ return DEFAULT_POLLMASK;
+
+ poll_wait(file, &ff->poll_wait, wait);
+
+ /*
+ * Ask for notification iff there's someone waiting for it.
+ * The client may ignore the flag and always notify.
+ */
+ if (waitqueue_active(&ff->poll_wait)) {
+ inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
+ fuse_register_polled_file(fc, ff);
+ }
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return POLLERR;
+
+ req->in.h.opcode = FUSE_POLL;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+
+ if (!err)
+ return outarg.revents;
+ if (err == -ENOSYS) {
+ fc->no_poll = 1;
+ return DEFAULT_POLLMASK;
+ }
+ return POLLERR;
+}
+EXPORT_SYMBOL_GPL(fuse_file_poll);
+
+/*
+ * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
+ * wakes up the poll waiters.
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+ struct fuse_notify_poll_wakeup_out *outarg)
+{
+ u64 kh = outarg->kh;
+ struct rb_node **link;
+
+ spin_lock(&fc->lock);
+
+ link = fuse_find_polled_node(fc, kh, NULL);
+ if (*link) {
+ struct fuse_file *ff;
+
+ ff = rb_entry(*link, struct fuse_file, polled_node);
+ wake_up_interruptible_sync(&ff->poll_wait);
+ }
+
+ spin_unlock(&fc->lock);
+ return 0;
+}
+
+static const struct file_operations fuse_file_operations = {
+ .llseek = fuse_file_llseek,
+ .read = do_sync_read,
+ .aio_read = fuse_file_aio_read,
+ .write = do_sync_write,
+ .aio_write = fuse_file_aio_write,
+ .mmap = fuse_file_mmap,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ .lock = fuse_file_lock,
+ .flock = fuse_file_flock,
+ .splice_read = generic_file_splice_read,
+ .unlocked_ioctl = fuse_file_ioctl,
+ .compat_ioctl = fuse_file_compat_ioctl,
+ .poll = fuse_file_poll,
+};
+
+static const struct file_operations fuse_direct_io_file_operations = {
+ .llseek = fuse_file_llseek,
+ .read = fuse_direct_read,
+ .write = fuse_direct_write,
+ .mmap = fuse_direct_mmap,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ .lock = fuse_file_lock,
+ .flock = fuse_file_flock,
+ .unlocked_ioctl = fuse_file_ioctl,
+ .compat_ioctl = fuse_file_compat_ioctl,
+ .poll = fuse_file_poll,
+ /* no splice_read */
+};
+
+static const struct address_space_operations fuse_file_aops = {
+ .readpage = fuse_readpage,
+ .writepage = fuse_writepage,
+ .launder_page = fuse_launder_page,
+ .write_begin = fuse_write_begin,
+ .write_end = fuse_write_end,
+ .readpages = fuse_readpages,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .bmap = fuse_bmap,
+};
+
+void fuse_init_file_inode(struct inode *inode)
+{
+ inode->i_fop = &fuse_file_operations;
+ inode->i_data.a_ops = &fuse_file_aops;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 00000000..f6215501
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,769 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#ifndef _FS_FUSE_I_H
+#define _FS_FUSE_I_H
+
+#include <linux/fuse.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/poll.h>
+#include <linux/workqueue.h>
+
+/** Max number of pages that can be used in a single read request */
+#define FUSE_MAX_PAGES_PER_REQ 32
+
+/** Bias for fi->writectr, meaning new writepages must not be sent */
+#define FUSE_NOWRITE INT_MIN
+
+/** It could be as large as PATH_MAX, but would that have any uses? */
+#define FUSE_NAME_MAX 1024
+
+/** Number of dentries for each connection in the control filesystem */
+#define FUSE_CTL_NUM_DENTRIES 5
+
+/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
+ module will check permissions based on the file mode. Otherwise no
+ permission checking is done in the kernel */
+#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
+
+/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
+ doing the mount will be allowed to access the filesystem */
+#define FUSE_ALLOW_OTHER (1 << 1)
+
+/** List of active connections */
+extern struct list_head fuse_conn_list;
+
+/** Global mutex protecting fuse_conn_list and the control filesystem */
+extern struct mutex fuse_mutex;
+
+/** Module parameters */
+extern unsigned max_user_bgreq;
+extern unsigned max_user_congthresh;
+
+/* One forget request */
+struct fuse_forget_link {
+ struct fuse_forget_one forget_one;
+ struct fuse_forget_link *next;
+};
+
+/** FUSE inode */
+struct fuse_inode {
+ /** Inode data */
+ struct inode inode;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** Number of lookups on this inode */
+ u64 nlookup;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_forget_link *forget;
+
+ /** Time in jiffies until the file attributes are valid */
+ u64 i_time;
+
+ /** The sticky bit in inode->i_mode may have been removed, so
+ preserve the original mode */
+ mode_t orig_i_mode;
+
+ /** 64 bit inode number */
+ u64 orig_ino;
+
+ /** Version of last attribute change */
+ u64 attr_version;
+
+ /** Files usable in writepage. Protected by fc->lock */
+ struct list_head write_files;
+
+ /** Writepages pending on truncate or fsync */
+ struct list_head queued_writes;
+
+ /** Number of sent writes, a negative bias (FUSE_NOWRITE)
+ * means more writes are blocked */
+ int writectr;
+
+ /** Waitq for writepage completion */
+ wait_queue_head_t page_waitq;
+
+ /** List of writepage requestst (pending or sent) */
+ struct list_head writepages;
+};
+
+struct fuse_conn;
+
+/** FUSE specific file data */
+struct fuse_file {
+ /** Fuse connection for this file */
+ struct fuse_conn *fc;
+
+ /** Request reserved for flush and release */
+ struct fuse_req *reserved_req;
+
+ /** Kernel file handle guaranteed to be unique */
+ u64 kh;
+
+ /** File handle used by userspace */
+ u64 fh;
+
+ /** Node id of this file */
+ u64 nodeid;
+
+ /** Refcount */
+ atomic_t count;
+
+ /** FOPEN_* flags returned by open */
+ u32 open_flags;
+
+ /** Entry on inode's write_files list */
+ struct list_head write_entry;
+
+ /** RB node to be linked on fuse_conn->polled_files */
+ struct rb_node polled_node;
+
+ /** Wait queue head for poll */
+ wait_queue_head_t poll_wait;
+};
+
+/** One input argument of a request */
+struct fuse_in_arg {
+ unsigned size;
+ const void *value;
+};
+
+/** The request input */
+struct fuse_in {
+ /** The request header */
+ struct fuse_in_header h;
+
+ /** True if the data for the last argument is in req->pages */
+ unsigned argpages:1;
+
+ /** Number of arguments */
+ unsigned numargs;
+
+ /** Array of arguments */
+ struct fuse_in_arg args[3];
+};
+
+/** One output argument of a request */
+struct fuse_arg {
+ unsigned size;
+ void *value;
+};
+
+/** The request output */
+struct fuse_out {
+ /** Header returned from userspace */
+ struct fuse_out_header h;
+
+ /*
+ * The following bitfields are not changed during the request
+ * processing
+ */
+
+ /** Last argument is variable length (can be shorter than
+ arg->size) */
+ unsigned argvar:1;
+
+ /** Last argument is a list of pages to copy data to */
+ unsigned argpages:1;
+
+ /** Zero partially or not copied pages */
+ unsigned page_zeroing:1;
+
+ /** Pages may be replaced with new ones */
+ unsigned page_replace:1;
+
+ /** Number or arguments */
+ unsigned numargs;
+
+ /** Array of arguments */
+ struct fuse_arg args[3];
+};
+
+/** The request state */
+enum fuse_req_state {
+ FUSE_REQ_INIT = 0,
+ FUSE_REQ_PENDING,
+ FUSE_REQ_READING,
+ FUSE_REQ_SENT,
+ FUSE_REQ_WRITING,
+ FUSE_REQ_FINISHED
+};
+
+/**
+ * A request to the client
+ */
+struct fuse_req {
+ /** This can be on either pending processing or io lists in
+ fuse_conn */
+ struct list_head list;
+
+ /** Entry on the interrupts list */
+ struct list_head intr_entry;
+
+ /** refcount */
+ atomic_t count;
+
+ /** Unique ID for the interrupt request */
+ u64 intr_unique;
+
+ /*
+ * The following bitfields are either set once before the
+ * request is queued or setting/clearing them is protected by
+ * fuse_conn->lock
+ */
+
+ /** True if the request has reply */
+ unsigned isreply:1;
+
+ /** Force sending of the request even if interrupted */
+ unsigned force:1;
+
+ /** The request was aborted */
+ unsigned aborted:1;
+
+ /** Request is sent in the background */
+ unsigned background:1;
+
+ /** The request has been interrupted */
+ unsigned interrupted:1;
+
+ /** Data is being copied to/from the request */
+ unsigned locked:1;
+
+ /** Request is counted as "waiting" */
+ unsigned waiting:1;
+
+ /** State of the request */
+ enum fuse_req_state state;
+
+ /** The request input */
+ struct fuse_in in;
+
+ /** The request output */
+ struct fuse_out out;
+
+ /** Used to wake up the task waiting for completion of request*/
+ wait_queue_head_t waitq;
+
+ /** Data for asynchronous requests */
+ union {
+ struct {
+ union {
+ struct fuse_release_in in;
+ struct work_struct work;
+ };
+ struct path path;
+ } release;
+ struct fuse_init_in init_in;
+ struct fuse_init_out init_out;
+ struct cuse_init_in cuse_init_in;
+ struct {
+ struct fuse_read_in in;
+ u64 attr_ver;
+ } read;
+ struct {
+ struct fuse_write_in in;
+ struct fuse_write_out out;
+ } write;
+ struct fuse_notify_retrieve_in retrieve_in;
+ struct fuse_lk_in lk_in;
+ } misc;
+
+ /** page vector */
+ struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+
+ /** number of pages in vector */
+ unsigned num_pages;
+
+ /** offset of data on first page */
+ unsigned page_offset;
+
+ /** File used in the request (or NULL) */
+ struct fuse_file *ff;
+
+ /** Inode used in the request or NULL */
+ struct inode *inode;
+
+ /** Link on fi->writepages */
+ struct list_head writepages_entry;
+
+ /** Request completion callback */
+ void (*end)(struct fuse_conn *, struct fuse_req *);
+
+ /** Request is stolen from fuse_file->reserved_req */
+ struct file *stolen_file;
+};
+
+/**
+ * A Fuse connection.
+ *
+ * This structure is created, when the filesystem is mounted, and is
+ * destroyed, when the client device is closed and the filesystem is
+ * unmounted.
+ */
+struct fuse_conn {
+ /** Lock protecting accessess to members of this structure */
+ spinlock_t lock;
+
+ /** Mutex protecting against directory alias creation */
+ struct mutex inst_mutex;
+
+ /** Refcount */
+ atomic_t count;
+
+ /** The user id for this mount */
+ uid_t user_id;
+
+ /** The group id for this mount */
+ gid_t group_id;
+
+ /** The fuse mount flags for this mount */
+ unsigned flags;
+
+ /** Maximum read size */
+ unsigned max_read;
+
+ /** Maximum write size */
+ unsigned max_write;
+
+ /** Readers of the connection are waiting on this */
+ wait_queue_head_t waitq;
+
+ /** The list of pending requests */
+ struct list_head pending;
+
+ /** The list of requests being processed */
+ struct list_head processing;
+
+ /** The list of requests under I/O */
+ struct list_head io;
+
+ /** The next unique kernel file handle */
+ u64 khctr;
+
+ /** rbtree of fuse_files waiting for poll events indexed by ph */
+ struct rb_root polled_files;
+
+ /** Maximum number of outstanding background requests */
+ unsigned max_background;
+
+ /** Number of background requests at which congestion starts */
+ unsigned congestion_threshold;
+
+ /** Number of requests currently in the background */
+ unsigned num_background;
+
+ /** Number of background requests currently queued for userspace */
+ unsigned active_background;
+
+ /** The list of background requests set aside for later queuing */
+ struct list_head bg_queue;
+
+ /** Pending interrupts */
+ struct list_head interrupts;
+
+ /** Queue of pending forgets */
+ struct fuse_forget_link forget_list_head;
+ struct fuse_forget_link *forget_list_tail;
+
+ /** Batching of FORGET requests (positive indicates FORGET batch) */
+ int forget_batch;
+
+ /** Flag indicating if connection is blocked. This will be
+ the case before the INIT reply is received, and if there
+ are too many outstading backgrounds requests */
+ int blocked;
+
+ /** waitq for blocked connection */
+ wait_queue_head_t blocked_waitq;
+
+ /** waitq for reserved requests */
+ wait_queue_head_t reserved_req_waitq;
+
+ /** The next unique request id */
+ u64 reqctr;
+
+ /** Connection established, cleared on umount, connection
+ abort and device release */
+ unsigned connected;
+
+ /** Connection failed (version mismatch). Cannot race with
+ setting other bitfields since it is only set once in INIT
+ reply, before any other request, and never cleared */
+ unsigned conn_error:1;
+
+ /** Connection successful. Only set in INIT */
+ unsigned conn_init:1;
+
+ /** Do readpages asynchronously? Only set in INIT */
+ unsigned async_read:1;
+
+ /** Do not send separate SETATTR request before open(O_TRUNC) */
+ unsigned atomic_o_trunc:1;
+
+ /** Filesystem supports NFS exporting. Only set in INIT */
+ unsigned export_support:1;
+
+ /** Set if bdi is valid */
+ unsigned bdi_initialized:1;
+
+ /*
+ * The following bitfields are only for optimization purposes
+ * and hence races in setting them will not cause malfunction
+ */
+
+ /** Is fsync not implemented by fs? */
+ unsigned no_fsync:1;
+
+ /** Is fsyncdir not implemented by fs? */
+ unsigned no_fsyncdir:1;
+
+ /** Is flush not implemented by fs? */
+ unsigned no_flush:1;
+
+ /** Is setxattr not implemented by fs? */
+ unsigned no_setxattr:1;
+
+ /** Is getxattr not implemented by fs? */
+ unsigned no_getxattr:1;
+
+ /** Is listxattr not implemented by fs? */
+ unsigned no_listxattr:1;
+
+ /** Is removexattr not implemented by fs? */
+ unsigned no_removexattr:1;
+
+ /** Are file locking primitives not implemented by fs? */
+ unsigned no_lock:1;
+
+ /** Is access not implemented by fs? */
+ unsigned no_access:1;
+
+ /** Is create not implemented by fs? */
+ unsigned no_create:1;
+
+ /** Is interrupt not implemented by fs? */
+ unsigned no_interrupt:1;
+
+ /** Is bmap not implemented by fs? */
+ unsigned no_bmap:1;
+
+ /** Is poll not implemented by fs? */
+ unsigned no_poll:1;
+
+ /** Do multi-page cached writes */
+ unsigned big_writes:1;
+
+ /** Don't apply umask to creation modes */
+ unsigned dont_mask:1;
+
+ /** The number of requests waiting for completion */
+ atomic_t num_waiting;
+
+ /** Negotiated minor version */
+ unsigned minor;
+
+ /** Backing dev info */
+ struct backing_dev_info bdi;
+
+ /** Entry on the fuse_conn_list */
+ struct list_head entry;
+
+ /** Device ID from super block */
+ dev_t dev;
+
+ /** Dentries in the control filesystem */
+ struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
+
+ /** number of dentries used in the above array */
+ int ctl_ndents;
+
+ /** O_ASYNC requests */
+ struct fasync_struct *fasync;
+
+ /** Key for lock owner ID scrambling */
+ u32 scramble_key[4];
+
+ /** Reserved request for the DESTROY message */
+ struct fuse_req *destroy_req;
+
+ /** Version counter for attribute changes */
+ u64 attr_version;
+
+ /** Called on final put */
+ void (*release)(struct fuse_conn *);
+
+ /** Super block for this connection. */
+ struct super_block *sb;
+
+ /** Read/write semaphore to hold when accessing sb. */
+ struct rw_semaphore killsb;
+};
+
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
+{
+ return get_fuse_conn_super(inode->i_sb);
+}
+
+static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+{
+ return container_of(inode, struct fuse_inode, inode);
+}
+
+static inline u64 get_node_id(struct inode *inode)
+{
+ return get_fuse_inode(inode)->nodeid;
+}
+
+/** Device operations */
+extern const struct file_operations fuse_dev_operations;
+
+extern const struct dentry_operations fuse_dentry_operations;
+
+/**
+ * Inode to nodeid comparison.
+ */
+int fuse_inode_eq(struct inode *inode, void *_nodeidp);
+
+/**
+ * Get a filled in inode
+ */
+struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
+ int generation, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version);
+
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode);
+
+/**
+ * Send FORGET command
+ */
+void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
+ u64 nodeid, u64 nlookup);
+
+struct fuse_forget_link *fuse_alloc_forget(void);
+
+/**
+ * Initialize READ or READDIR request
+ */
+void fuse_read_fill(struct fuse_req *req, struct file *file,
+ loff_t pos, size_t count, int opcode);
+
+/**
+ * Send OPEN or OPENDIR request
+ */
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
+
+struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_get(struct fuse_file *ff);
+void fuse_file_free(struct fuse_file *ff);
+void fuse_finish_open(struct inode *inode, struct file *file);
+
+void fuse_sync_release(struct fuse_file *ff, int flags);
+
+/**
+ * Send RELEASE or RELEASEDIR request
+ */
+void fuse_release_common(struct file *file, int opcode);
+
+/**
+ * Send FSYNC or FSYNCDIR request
+ */
+int fuse_fsync_common(struct file *file, int datasync, int isdir);
+
+/**
+ * Notify poll wakeup
+ */
+int fuse_notify_poll_wakeup(struct fuse_conn *fc,
+ struct fuse_notify_poll_wakeup_out *outarg);
+
+/**
+ * Initialize file operations on a regular file
+ */
+void fuse_init_file_inode(struct inode *inode);
+
+/**
+ * Initialize inode operations on regular files and special files
+ */
+void fuse_init_common(struct inode *inode);
+
+/**
+ * Initialize inode and file operations on a directory
+ */
+void fuse_init_dir(struct inode *inode);
+
+/**
+ * Initialize inode operations on a symlink
+ */
+void fuse_init_symlink(struct inode *inode);
+
+/**
+ * Change attributes of an inode
+ */
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version);
+
+void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid);
+
+/**
+ * Initialize the client device
+ */
+int fuse_dev_init(void);
+
+/**
+ * Cleanup the client device
+ */
+void fuse_dev_cleanup(void);
+
+int fuse_ctl_init(void);
+void fuse_ctl_cleanup(void);
+
+/**
+ * Allocate a request
+ */
+struct fuse_req *fuse_request_alloc(void);
+
+struct fuse_req *fuse_request_alloc_nofs(void);
+
+/**
+ * Free a request
+ */
+void fuse_request_free(struct fuse_req *req);
+
+/**
+ * Get a request, may fail with -ENOMEM
+ */
+struct fuse_req *fuse_get_req(struct fuse_conn *fc);
+
+/**
+ * Gets a requests for a file operation, always succeeds
+ */
+struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file);
+
+/**
+ * Decrement reference count of a request. If count goes to zero free
+ * the request.
+ */
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request (synchronous)
+ */
+void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request in the background
+ */
+void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+
+void fuse_request_send_background_locked(struct fuse_conn *fc,
+ struct fuse_req *req);
+
+/* Abort all requests */
+void fuse_abort_conn(struct fuse_conn *fc);
+
+/**
+ * Invalidate inode attributes
+ */
+void fuse_invalidate_attr(struct inode *inode);
+
+void fuse_invalidate_entry_cache(struct dentry *entry);
+
+/**
+ * Acquire reference to fuse_conn
+ */
+struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
+
+void fuse_conn_kill(struct fuse_conn *fc);
+
+/**
+ * Initialize fuse_conn
+ */
+void fuse_conn_init(struct fuse_conn *fc);
+
+/**
+ * Release reference to fuse_conn
+ */
+void fuse_conn_put(struct fuse_conn *fc);
+
+/**
+ * Add connection to control filesystem
+ */
+int fuse_ctl_add_conn(struct fuse_conn *fc);
+
+/**
+ * Remove connection from control filesystem
+ */
+void fuse_ctl_remove_conn(struct fuse_conn *fc);
+
+/**
+ * Is file type valid?
+ */
+int fuse_valid_type(int m);
+
+/**
+ * Is task allowed to perform filesystem operation?
+ */
+int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task);
+
+u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
+
+int fuse_update_attributes(struct inode *inode, struct kstat *stat,
+ struct file *file, bool *refreshed);
+
+void fuse_flush_writepages(struct inode *inode);
+
+void fuse_set_nowrite(struct inode *inode);
+void fuse_release_nowrite(struct inode *inode);
+
+u64 fuse_get_attr_version(struct fuse_conn *fc);
+
+/**
+ * File-system tells the kernel to invalidate cache for the given node id.
+ */
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+ loff_t offset, loff_t len);
+
+/**
+ * File-system tells the kernel to invalidate parent attributes and
+ * the dentry matching parent/name.
+ */
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+ struct qstr *name);
+
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir);
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write);
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags);
+unsigned fuse_file_poll(struct file *file, poll_table *wait);
+int fuse_dev_release(struct inode *inode, struct file *file);
+
+void fuse_write_update_size(struct inode *inode, loff_t pos);
+
+#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 00000000..69a1e0f0
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,1263 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/parser.h>
+#include <linux/statfs.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/exportfs.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+static struct kmem_cache *fuse_inode_cachep;
+struct list_head fuse_conn_list;
+DEFINE_MUTEX(fuse_mutex);
+
+static int set_global_limit(const char *val, struct kernel_param *kp);
+
+unsigned max_user_bgreq;
+module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
+ &max_user_bgreq, 0644);
+__MODULE_PARM_TYPE(max_user_bgreq, "uint");
+MODULE_PARM_DESC(max_user_bgreq,
+ "Global limit for the maximum number of backgrounded requests an "
+ "unprivileged user can set");
+
+unsigned max_user_congthresh;
+module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
+ &max_user_congthresh, 0644);
+__MODULE_PARM_TYPE(max_user_congthresh, "uint");
+MODULE_PARM_DESC(max_user_congthresh,
+ "Global limit for the maximum congestion threshold an "
+ "unprivileged user can set");
+
+#define FUSE_SUPER_MAGIC 0x65735546
+
+#define FUSE_DEFAULT_BLKSIZE 512
+
+/** Maximum number of outstanding background requests */
+#define FUSE_DEFAULT_MAX_BACKGROUND 12
+
+/** Congestion starts at 75% of maximum */
+#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
+
+struct fuse_mount_data {
+ int fd;
+ unsigned rootmode;
+ unsigned user_id;
+ unsigned group_id;
+ unsigned fd_present:1;
+ unsigned rootmode_present:1;
+ unsigned user_id_present:1;
+ unsigned group_id_present:1;
+ unsigned flags;
+ unsigned max_read;
+ unsigned blksize;
+};
+
+struct fuse_forget_link *fuse_alloc_forget()
+{
+ return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
+}
+
+static struct inode *fuse_alloc_inode(struct super_block *sb)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+
+ inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
+ if (!inode)
+ return NULL;
+
+ fi = get_fuse_inode(inode);
+ fi->i_time = 0;
+ fi->nodeid = 0;
+ fi->nlookup = 0;
+ fi->attr_version = 0;
+ fi->writectr = 0;
+ fi->orig_ino = 0;
+ INIT_LIST_HEAD(&fi->write_files);
+ INIT_LIST_HEAD(&fi->queued_writes);
+ INIT_LIST_HEAD(&fi->writepages);
+ init_waitqueue_head(&fi->page_waitq);
+ fi->forget = fuse_alloc_forget();
+ if (!fi->forget) {
+ kmem_cache_free(fuse_inode_cachep, inode);
+ return NULL;
+ }
+
+ return inode;
+}
+
+static void fuse_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(fuse_inode_cachep, inode);
+}
+
+static void fuse_destroy_inode(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ BUG_ON(!list_empty(&fi->write_files));
+ BUG_ON(!list_empty(&fi->queued_writes));
+ kfree(fi->forget);
+ call_rcu(&inode->i_rcu, fuse_i_callback);
+}
+
+static void fuse_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+ end_writeback(inode);
+ if (inode->i_sb->s_flags & MS_ACTIVE) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
+ fi->forget = NULL;
+ }
+}
+
+static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ if (*flags & MS_MANDLOCK)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
+ * so that it will fit.
+ */
+static ino_t fuse_squash_ino(u64 ino64)
+{
+ ino_t ino = (ino_t) ino64;
+ if (sizeof(ino_t) < sizeof(u64))
+ ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
+ return ino;
+}
+
+void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ fi->attr_version = ++fc->attr_version;
+ fi->i_time = attr_valid;
+
+ inode->i_ino = fuse_squash_ino(attr->ino);
+ inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
+ inode->i_nlink = attr->nlink;
+ inode->i_uid = attr->uid;
+ inode->i_gid = attr->gid;
+ inode->i_blocks = attr->blocks;
+ inode->i_atime.tv_sec = attr->atime;
+ inode->i_atime.tv_nsec = attr->atimensec;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+
+ if (attr->blksize != 0)
+ inode->i_blkbits = ilog2(attr->blksize);
+ else
+ inode->i_blkbits = inode->i_sb->s_blocksize_bits;
+
+ /*
+ * Don't set the sticky bit in i_mode, unless we want the VFS
+ * to check permissions. This prevents failures due to the
+ * check in may_delete().
+ */
+ fi->orig_i_mode = inode->i_mode;
+ if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+ inode->i_mode &= ~S_ISVTX;
+
+ fi->orig_ino = attr->ino;
+}
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ loff_t oldsize;
+
+ spin_lock(&fc->lock);
+ if (attr_version != 0 && fi->attr_version > attr_version) {
+ spin_unlock(&fc->lock);
+ return;
+ }
+
+ fuse_change_attributes_common(inode, attr, attr_valid);
+
+ oldsize = inode->i_size;
+ i_size_write(inode, attr->size);
+ spin_unlock(&fc->lock);
+
+ if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
+ truncate_pagecache(inode, oldsize, attr->size);
+ invalidate_inode_pages2(inode->i_mapping);
+ }
+}
+
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+{
+ inode->i_mode = attr->mode & S_IFMT;
+ inode->i_size = attr->size;
+ if (S_ISREG(inode->i_mode)) {
+ fuse_init_common(inode);
+ fuse_init_file_inode(inode);
+ } else if (S_ISDIR(inode->i_mode))
+ fuse_init_dir(inode);
+ else if (S_ISLNK(inode->i_mode))
+ fuse_init_symlink(inode);
+ else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ fuse_init_common(inode);
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(attr->rdev));
+ } else
+ BUG();
+}
+
+int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+{
+ u64 nodeid = *(u64 *) _nodeidp;
+ if (get_node_id(inode) == nodeid)
+ return 1;
+ else
+ return 0;
+}
+
+static int fuse_inode_set(struct inode *inode, void *_nodeidp)
+{
+ u64 nodeid = *(u64 *) _nodeidp;
+ get_fuse_inode(inode)->nodeid = nodeid;
+ return 0;
+}
+
+struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
+ int generation, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ retry:
+ inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
+ if (!inode)
+ return NULL;
+
+ if ((inode->i_state & I_NEW)) {
+ inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_generation = generation;
+ inode->i_data.backing_dev_info = &fc->bdi;
+ fuse_init_inode(inode, attr);
+ unlock_new_inode(inode);
+ } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+ /* Inode has changed type, any I/O on the old should fail */
+ make_bad_inode(inode);
+ iput(inode);
+ goto retry;
+ }
+
+ fi = get_fuse_inode(inode);
+ spin_lock(&fc->lock);
+ fi->nlookup++;
+ spin_unlock(&fc->lock);
+ fuse_change_attributes(inode, attr, attr_valid, attr_version);
+
+ return inode;
+}
+
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+ loff_t offset, loff_t len)
+{
+ struct inode *inode;
+ pgoff_t pg_start;
+ pgoff_t pg_end;
+
+ inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ return -ENOENT;
+
+ fuse_invalidate_attr(inode);
+ if (offset >= 0) {
+ pg_start = offset >> PAGE_CACHE_SHIFT;
+ if (len <= 0)
+ pg_end = -1;
+ else
+ pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
+ }
+ iput(inode);
+ return 0;
+}
+
+static void fuse_umount_begin(struct super_block *sb)
+{
+ fuse_abort_conn(get_fuse_conn_super(sb));
+}
+
+static void fuse_send_destroy(struct fuse_conn *fc)
+{
+ struct fuse_req *req = fc->destroy_req;
+ if (req && fc->conn_init) {
+ fc->destroy_req = NULL;
+ req->in.h.opcode = FUSE_DESTROY;
+ req->force = 1;
+ fuse_request_send(fc, req);
+ fuse_put_request(fc, req);
+ }
+}
+
+static void fuse_bdi_destroy(struct fuse_conn *fc)
+{
+ if (fc->bdi_initialized)
+ bdi_destroy(&fc->bdi);
+}
+
+void fuse_conn_kill(struct fuse_conn *fc)
+{
+ spin_lock(&fc->lock);
+ fc->connected = 0;
+ fc->blocked = 0;
+ spin_unlock(&fc->lock);
+ /* Flush all readers on this fs */
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+ wake_up_all(&fc->waitq);
+ wake_up_all(&fc->blocked_waitq);
+ wake_up_all(&fc->reserved_req_waitq);
+ mutex_lock(&fuse_mutex);
+ list_del(&fc->entry);
+ fuse_ctl_remove_conn(fc);
+ mutex_unlock(&fuse_mutex);
+ fuse_bdi_destroy(fc);
+}
+EXPORT_SYMBOL_GPL(fuse_conn_kill);
+
+static void fuse_put_super(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ fuse_send_destroy(fc);
+ fuse_conn_kill(fc);
+ fuse_conn_put(fc);
+}
+
+static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
+{
+ stbuf->f_type = FUSE_SUPER_MAGIC;
+ stbuf->f_bsize = attr->bsize;
+ stbuf->f_frsize = attr->frsize;
+ stbuf->f_blocks = attr->blocks;
+ stbuf->f_bfree = attr->bfree;
+ stbuf->f_bavail = attr->bavail;
+ stbuf->f_files = attr->files;
+ stbuf->f_ffree = attr->ffree;
+ stbuf->f_namelen = attr->namelen;
+ /* fsid is left zero */
+}
+
+static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_req *req;
+ struct fuse_statfs_out outarg;
+ int err;
+
+ if (!fuse_allow_task(fc, current)) {
+ buf->f_type = FUSE_SUPER_MAGIC;
+ return 0;
+ }
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ memset(&outarg, 0, sizeof(outarg));
+ req->in.numargs = 0;
+ req->in.h.opcode = FUSE_STATFS;
+ req->in.h.nodeid = get_node_id(dentry->d_inode);
+ req->out.numargs = 1;
+ req->out.args[0].size =
+ fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ if (!err)
+ convert_fuse_statfs(buf, &outarg.st);
+ fuse_put_request(fc, req);
+ return err;
+}
+
+enum {
+ OPT_FD,
+ OPT_ROOTMODE,
+ OPT_USER_ID,
+ OPT_GROUP_ID,
+ OPT_DEFAULT_PERMISSIONS,
+ OPT_ALLOW_OTHER,
+ OPT_MAX_READ,
+ OPT_BLKSIZE,
+ OPT_ERR
+};
+
+static const match_table_t tokens = {
+ {OPT_FD, "fd=%u"},
+ {OPT_ROOTMODE, "rootmode=%o"},
+ {OPT_USER_ID, "user_id=%u"},
+ {OPT_GROUP_ID, "group_id=%u"},
+ {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
+ {OPT_ALLOW_OTHER, "allow_other"},
+ {OPT_MAX_READ, "max_read=%u"},
+ {OPT_BLKSIZE, "blksize=%u"},
+ {OPT_ERR, NULL}
+};
+
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
+{
+ char *p;
+ memset(d, 0, sizeof(struct fuse_mount_data));
+ d->max_read = ~0;
+ d->blksize = FUSE_DEFAULT_BLKSIZE;
+
+ while ((p = strsep(&opt, ",")) != NULL) {
+ int token;
+ int value;
+ substring_t args[MAX_OPT_ARGS];
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case OPT_FD:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->fd = value;
+ d->fd_present = 1;
+ break;
+
+ case OPT_ROOTMODE:
+ if (match_octal(&args[0], &value))
+ return 0;
+ if (!fuse_valid_type(value))
+ return 0;
+ d->rootmode = value;
+ d->rootmode_present = 1;
+ break;
+
+ case OPT_USER_ID:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->user_id = value;
+ d->user_id_present = 1;
+ break;
+
+ case OPT_GROUP_ID:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->group_id = value;
+ d->group_id_present = 1;
+ break;
+
+ case OPT_DEFAULT_PERMISSIONS:
+ d->flags |= FUSE_DEFAULT_PERMISSIONS;
+ break;
+
+ case OPT_ALLOW_OTHER:
+ d->flags |= FUSE_ALLOW_OTHER;
+ break;
+
+ case OPT_MAX_READ:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->max_read = value;
+ break;
+
+ case OPT_BLKSIZE:
+ if (!is_bdev || match_int(&args[0], &value))
+ return 0;
+ d->blksize = value;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+
+ if (!d->fd_present || !d->rootmode_present ||
+ !d->user_id_present || !d->group_id_present)
+ return 0;
+
+ return 1;
+}
+
+static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+
+ seq_printf(m, ",user_id=%u", fc->user_id);
+ seq_printf(m, ",group_id=%u", fc->group_id);
+ if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+ seq_puts(m, ",default_permissions");
+ if (fc->flags & FUSE_ALLOW_OTHER)
+ seq_puts(m, ",allow_other");
+ if (fc->max_read != ~0)
+ seq_printf(m, ",max_read=%u", fc->max_read);
+ if (mnt->mnt_sb->s_bdev &&
+ mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
+ seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
+ return 0;
+}
+
+void fuse_conn_init(struct fuse_conn *fc)
+{
+ memset(fc, 0, sizeof(*fc));
+ spin_lock_init(&fc->lock);
+ mutex_init(&fc->inst_mutex);
+ init_rwsem(&fc->killsb);
+ atomic_set(&fc->count, 1);
+ init_waitqueue_head(&fc->waitq);
+ init_waitqueue_head(&fc->blocked_waitq);
+ init_waitqueue_head(&fc->reserved_req_waitq);
+ INIT_LIST_HEAD(&fc->pending);
+ INIT_LIST_HEAD(&fc->processing);
+ INIT_LIST_HEAD(&fc->io);
+ INIT_LIST_HEAD(&fc->interrupts);
+ INIT_LIST_HEAD(&fc->bg_queue);
+ INIT_LIST_HEAD(&fc->entry);
+ fc->forget_list_tail = &fc->forget_list_head;
+ atomic_set(&fc->num_waiting, 0);
+ fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
+ fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
+ fc->khctr = 0;
+ fc->polled_files = RB_ROOT;
+ fc->reqctr = 0;
+ fc->blocked = 1;
+ fc->attr_version = 1;
+ get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+}
+EXPORT_SYMBOL_GPL(fuse_conn_init);
+
+void fuse_conn_put(struct fuse_conn *fc)
+{
+ if (atomic_dec_and_test(&fc->count)) {
+ if (fc->destroy_req)
+ fuse_request_free(fc->destroy_req);
+ mutex_destroy(&fc->inst_mutex);
+ fc->release(fc);
+ }
+}
+EXPORT_SYMBOL_GPL(fuse_conn_put);
+
+struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
+{
+ atomic_inc(&fc->count);
+ return fc;
+}
+EXPORT_SYMBOL_GPL(fuse_conn_get);
+
+static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
+{
+ struct fuse_attr attr;
+ memset(&attr, 0, sizeof(attr));
+
+ attr.mode = mode;
+ attr.ino = FUSE_ROOT_ID;
+ attr.nlink = 1;
+ return fuse_iget(sb, 1, 0, &attr, 0, 0);
+}
+
+struct fuse_inode_handle {
+ u64 nodeid;
+ u32 generation;
+};
+
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+ struct fuse_inode_handle *handle)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct inode *inode;
+ struct dentry *entry;
+ int err = -ESTALE;
+
+ if (handle->nodeid == 0)
+ goto out_err;
+
+ inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+ if (!inode) {
+ struct fuse_entry_out outarg;
+ struct qstr name;
+
+ if (!fc->export_support)
+ goto out_err;
+
+ name.len = 1;
+ name.name = ".";
+ err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+ &inode);
+ if (err && err != -ENOENT)
+ goto out_err;
+ if (err || !inode) {
+ err = -ESTALE;
+ goto out_err;
+ }
+ err = -EIO;
+ if (get_node_id(inode) != handle->nodeid)
+ goto out_iput;
+ }
+ err = -ESTALE;
+ if (inode->i_generation != handle->generation)
+ goto out_iput;
+
+ entry = d_obtain_alias(inode);
+ if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
+ fuse_invalidate_entry_cache(entry);
+
+ return entry;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
+}
+
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+ int connectable)
+{
+ struct inode *inode = dentry->d_inode;
+ bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+ int len = encode_parent ? 6 : 3;
+ u64 nodeid;
+ u32 generation;
+
+ if (*max_len < len) {
+ *max_len = len;
+ return 255;
+ }
+
+ nodeid = get_fuse_inode(inode)->nodeid;
+ generation = inode->i_generation;
+
+ fh[0] = (u32)(nodeid >> 32);
+ fh[1] = (u32)(nodeid & 0xffffffff);
+ fh[2] = generation;
+
+ if (encode_parent) {
+ struct inode *parent;
+
+ spin_lock(&dentry->d_lock);
+ parent = dentry->d_parent->d_inode;
+ nodeid = get_fuse_inode(parent)->nodeid;
+ generation = parent->i_generation;
+ spin_unlock(&dentry->d_lock);
+
+ fh[3] = (u32)(nodeid >> 32);
+ fh[4] = (u32)(nodeid & 0xffffffff);
+ fh[5] = generation;
+ }
+
+ *max_len = len;
+ return encode_parent ? 0x82 : 0x81;
+}
+
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle handle;
+
+ if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+ return NULL;
+
+ handle.nodeid = (u64) fid->raw[0] << 32;
+ handle.nodeid |= (u64) fid->raw[1];
+ handle.generation = fid->raw[2];
+ return fuse_get_dentry(sb, &handle);
+}
+
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle parent;
+
+ if (fh_type != 0x82 || fh_len < 6)
+ return NULL;
+
+ parent.nodeid = (u64) fid->raw[3] << 32;
+ parent.nodeid |= (u64) fid->raw[4];
+ parent.generation = fid->raw[5];
+ return fuse_get_dentry(sb, &parent);
+}
+
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+ struct inode *child_inode = child->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(child_inode);
+ struct inode *inode;
+ struct dentry *parent;
+ struct fuse_entry_out outarg;
+ struct qstr name;
+ int err;
+
+ if (!fc->export_support)
+ return ERR_PTR(-ESTALE);
+
+ name.len = 2;
+ name.name = "..";
+ err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+ &name, &outarg, &inode);
+ if (err) {
+ if (err == -ENOENT)
+ return ERR_PTR(-ESTALE);
+ return ERR_PTR(err);
+ }
+
+ parent = d_obtain_alias(inode);
+ if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
+ fuse_invalidate_entry_cache(parent);
+
+ return parent;
+}
+
+static const struct export_operations fuse_export_operations = {
+ .fh_to_dentry = fuse_fh_to_dentry,
+ .fh_to_parent = fuse_fh_to_parent,
+ .encode_fh = fuse_encode_fh,
+ .get_parent = fuse_get_parent,
+};
+
+static const struct super_operations fuse_super_operations = {
+ .alloc_inode = fuse_alloc_inode,
+ .destroy_inode = fuse_destroy_inode,
+ .evict_inode = fuse_evict_inode,
+ .drop_inode = generic_delete_inode,
+ .remount_fs = fuse_remount_fs,
+ .put_super = fuse_put_super,
+ .umount_begin = fuse_umount_begin,
+ .statfs = fuse_statfs,
+ .show_options = fuse_show_options,
+};
+
+static void sanitize_global_limit(unsigned *limit)
+{
+ if (*limit == 0)
+ *limit = ((num_physpages << PAGE_SHIFT) >> 13) /
+ sizeof(struct fuse_req);
+
+ if (*limit >= 1 << 16)
+ *limit = (1 << 16) - 1;
+}
+
+static int set_global_limit(const char *val, struct kernel_param *kp)
+{
+ int rv;
+
+ rv = param_set_uint(val, kp);
+ if (rv)
+ return rv;
+
+ sanitize_global_limit((unsigned *)kp->arg);
+
+ return 0;
+}
+
+static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
+{
+ int cap_sys_admin = capable(CAP_SYS_ADMIN);
+
+ if (arg->minor < 13)
+ return;
+
+ sanitize_global_limit(&max_user_bgreq);
+ sanitize_global_limit(&max_user_congthresh);
+
+ if (arg->max_background) {
+ fc->max_background = arg->max_background;
+
+ if (!cap_sys_admin && fc->max_background > max_user_bgreq)
+ fc->max_background = max_user_bgreq;
+ }
+ if (arg->congestion_threshold) {
+ fc->congestion_threshold = arg->congestion_threshold;
+
+ if (!cap_sys_admin &&
+ fc->congestion_threshold > max_user_congthresh)
+ fc->congestion_threshold = max_user_congthresh;
+ }
+}
+
+static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct fuse_init_out *arg = &req->misc.init_out;
+
+ if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
+ fc->conn_error = 1;
+ else {
+ unsigned long ra_pages;
+
+ process_init_limits(fc, arg);
+
+ if (arg->minor >= 6) {
+ ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
+ if (arg->flags & FUSE_ASYNC_READ)
+ fc->async_read = 1;
+ if (!(arg->flags & FUSE_POSIX_LOCKS))
+ fc->no_lock = 1;
+ if (arg->flags & FUSE_ATOMIC_O_TRUNC)
+ fc->atomic_o_trunc = 1;
+ if (arg->minor >= 9) {
+ /* LOOKUP has dependency on proto version */
+ if (arg->flags & FUSE_EXPORT_SUPPORT)
+ fc->export_support = 1;
+ }
+ if (arg->flags & FUSE_BIG_WRITES)
+ fc->big_writes = 1;
+ if (arg->flags & FUSE_DONT_MASK)
+ fc->dont_mask = 1;
+ } else {
+ ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+ fc->no_lock = 1;
+ }
+
+ fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
+ fc->minor = arg->minor;
+ fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+ fc->max_write = max_t(unsigned, 4096, fc->max_write);
+ fc->conn_init = 1;
+ }
+ fc->blocked = 0;
+ wake_up_all(&fc->blocked_waitq);
+}
+
+static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct fuse_init_in *arg = &req->misc.init_in;
+
+ arg->major = FUSE_KERNEL_VERSION;
+ arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
+ arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
+ req->in.h.opcode = FUSE_INIT;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*arg);
+ req->in.args[0].value = arg;
+ req->out.numargs = 1;
+ /* Variable length argument used for backward compatibility
+ with interface version < 7.5. Rest of init_out is zeroed
+ by do_get_request(), so a short reply is not a problem */
+ req->out.argvar = 1;
+ req->out.args[0].size = sizeof(struct fuse_init_out);
+ req->out.args[0].value = &req->misc.init_out;
+ req->end = process_init_reply;
+ fuse_request_send_background(fc, req);
+}
+
+static void fuse_free_conn(struct fuse_conn *fc)
+{
+ kfree(fc);
+}
+
+static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
+{
+ int err;
+
+ fc->bdi.name = "fuse";
+ fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ /* fuse does it's own writeback accounting */
+ fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+
+ err = bdi_init(&fc->bdi);
+ if (err)
+ return err;
+
+ fc->bdi_initialized = 1;
+
+ if (sb->s_bdev) {
+ err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+ MAJOR(fc->dev), MINOR(fc->dev));
+ } else {
+ err = bdi_register_dev(&fc->bdi, fc->dev);
+ }
+
+ if (err)
+ return err;
+
+ /*
+ * For a single fuse filesystem use max 1% of dirty +
+ * writeback threshold.
+ *
+ * This gives about 1M of write buffer for memory maps on a
+ * machine with 1G and 10% dirty_ratio, which should be more
+ * than enough.
+ *
+ * Privileged users can raise it by writing to
+ *
+ * /sys/class/bdi/<bdi>/max_ratio
+ */
+ bdi_set_max_ratio(&fc->bdi, 1);
+
+ return 0;
+}
+
+static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct fuse_conn *fc;
+ struct inode *root;
+ struct fuse_mount_data d;
+ struct file *file;
+ struct dentry *root_dentry;
+ struct fuse_req *init_req;
+ int err;
+ int is_bdev = sb->s_bdev != NULL;
+
+ err = -EINVAL;
+ if (sb->s_flags & MS_MANDLOCK)
+ goto err;
+
+ sb->s_flags &= ~MS_NOSEC;
+
+ if (!parse_fuse_opt((char *) data, &d, is_bdev))
+ goto err;
+
+ if (is_bdev) {
+#ifdef CONFIG_BLOCK
+ err = -EINVAL;
+ if (!sb_set_blocksize(sb, d.blksize))
+ goto err;
+#endif
+ } else {
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ }
+ sb->s_magic = FUSE_SUPER_MAGIC;
+ sb->s_op = &fuse_super_operations;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_export_op = &fuse_export_operations;
+
+ file = fget(d.fd);
+ err = -EINVAL;
+ if (!file)
+ goto err;
+
+ if (file->f_op != &fuse_dev_operations)
+ goto err_fput;
+
+ fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+ err = -ENOMEM;
+ if (!fc)
+ goto err_fput;
+
+ fuse_conn_init(fc);
+
+ fc->dev = sb->s_dev;
+ fc->sb = sb;
+ err = fuse_bdi_init(fc, sb);
+ if (err)
+ goto err_put_conn;
+
+ sb->s_bdi = &fc->bdi;
+
+ /* Handle umasking inside the fuse code */
+ if (sb->s_flags & MS_POSIXACL)
+ fc->dont_mask = 1;
+ sb->s_flags |= MS_POSIXACL;
+
+ fc->release = fuse_free_conn;
+ fc->flags = d.flags;
+ fc->user_id = d.user_id;
+ fc->group_id = d.group_id;
+ fc->max_read = max_t(unsigned, 4096, d.max_read);
+
+ /* Used by get_root_inode() */
+ sb->s_fs_info = fc;
+
+ err = -ENOMEM;
+ root = fuse_get_root_inode(sb, d.rootmode);
+ if (!root)
+ goto err_put_conn;
+
+ root_dentry = d_alloc_root(root);
+ if (!root_dentry) {
+ iput(root);
+ goto err_put_conn;
+ }
+ /* only now - we want root dentry with NULL ->d_op */
+ sb->s_d_op = &fuse_dentry_operations;
+
+ init_req = fuse_request_alloc();
+ if (!init_req)
+ goto err_put_root;
+
+ if (is_bdev) {
+ fc->destroy_req = fuse_request_alloc();
+ if (!fc->destroy_req)
+ goto err_free_init_req;
+ }
+
+ mutex_lock(&fuse_mutex);
+ err = -EINVAL;
+ if (file->private_data)
+ goto err_unlock;
+
+ err = fuse_ctl_add_conn(fc);
+ if (err)
+ goto err_unlock;
+
+ list_add_tail(&fc->entry, &fuse_conn_list);
+ sb->s_root = root_dentry;
+ fc->connected = 1;
+ file->private_data = fuse_conn_get(fc);
+ mutex_unlock(&fuse_mutex);
+ /*
+ * atomic_dec_and_test() in fput() provides the necessary
+ * memory barrier for file->private_data to be visible on all
+ * CPUs after this
+ */
+ fput(file);
+
+ fuse_send_init(fc, init_req);
+
+ return 0;
+
+ err_unlock:
+ mutex_unlock(&fuse_mutex);
+ err_free_init_req:
+ fuse_request_free(init_req);
+ err_put_root:
+ dput(root_dentry);
+ err_put_conn:
+ fuse_bdi_destroy(fc);
+ fuse_conn_put(fc);
+ err_fput:
+ fput(file);
+ err:
+ return err;
+}
+
+static struct dentry *fuse_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data)
+{
+ return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
+}
+
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (fc) {
+ down_write(&fc->killsb);
+ fc->sb = NULL;
+ up_write(&fc->killsb);
+ }
+
+ kill_anon_super(sb);
+}
+
+static struct file_system_type fuse_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fuse",
+ .fs_flags = FS_HAS_SUBTYPE,
+ .mount = fuse_mount,
+ .kill_sb = fuse_kill_sb_anon,
+};
+
+#ifdef CONFIG_BLOCK
+static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data)
+{
+ return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
+}
+
+static void fuse_kill_sb_blk(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ if (fc) {
+ down_write(&fc->killsb);
+ fc->sb = NULL;
+ up_write(&fc->killsb);
+ }
+
+ kill_block_super(sb);
+}
+
+static struct file_system_type fuseblk_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fuseblk",
+ .mount = fuse_mount_blk,
+ .kill_sb = fuse_kill_sb_blk,
+ .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
+};
+
+static inline int register_fuseblk(void)
+{
+ return register_filesystem(&fuseblk_fs_type);
+}
+
+static inline void unregister_fuseblk(void)
+{
+ unregister_filesystem(&fuseblk_fs_type);
+}
+#else
+static inline int register_fuseblk(void)
+{
+ return 0;
+}
+
+static inline void unregister_fuseblk(void)
+{
+}
+#endif
+
+static void fuse_inode_init_once(void *foo)
+{
+ struct inode *inode = foo;
+
+ inode_init_once(inode);
+}
+
+static int __init fuse_fs_init(void)
+{
+ int err;
+
+ err = register_filesystem(&fuse_fs_type);
+ if (err)
+ goto out;
+
+ err = register_fuseblk();
+ if (err)
+ goto out_unreg;
+
+ fuse_inode_cachep = kmem_cache_create("fuse_inode",
+ sizeof(struct fuse_inode),
+ 0, SLAB_HWCACHE_ALIGN,
+ fuse_inode_init_once);
+ err = -ENOMEM;
+ if (!fuse_inode_cachep)
+ goto out_unreg2;
+
+ return 0;
+
+ out_unreg2:
+ unregister_fuseblk();
+ out_unreg:
+ unregister_filesystem(&fuse_fs_type);
+ out:
+ return err;
+}
+
+static void fuse_fs_cleanup(void)
+{
+ unregister_filesystem(&fuse_fs_type);
+ unregister_fuseblk();
+ kmem_cache_destroy(fuse_inode_cachep);
+}
+
+static struct kobject *fuse_kobj;
+static struct kobject *connections_kobj;
+
+static int fuse_sysfs_init(void)
+{
+ int err;
+
+ fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
+ if (!fuse_kobj) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ connections_kobj = kobject_create_and_add("connections", fuse_kobj);
+ if (!connections_kobj) {
+ err = -ENOMEM;
+ goto out_fuse_unregister;
+ }
+
+ return 0;
+
+ out_fuse_unregister:
+ kobject_put(fuse_kobj);
+ out_err:
+ return err;
+}
+
+static void fuse_sysfs_cleanup(void)
+{
+ kobject_put(connections_kobj);
+ kobject_put(fuse_kobj);
+}
+
+static int __init fuse_init(void)
+{
+ int res;
+
+ printk(KERN_INFO "fuse init (API version %i.%i)\n",
+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+
+ INIT_LIST_HEAD(&fuse_conn_list);
+ res = fuse_fs_init();
+ if (res)
+ goto err;
+
+ res = fuse_dev_init();
+ if (res)
+ goto err_fs_cleanup;
+
+ res = fuse_sysfs_init();
+ if (res)
+ goto err_dev_cleanup;
+
+ res = fuse_ctl_init();
+ if (res)
+ goto err_sysfs_cleanup;
+
+ sanitize_global_limit(&max_user_bgreq);
+ sanitize_global_limit(&max_user_congthresh);
+
+ return 0;
+
+ err_sysfs_cleanup:
+ fuse_sysfs_cleanup();
+ err_dev_cleanup:
+ fuse_dev_cleanup();
+ err_fs_cleanup:
+ fuse_fs_cleanup();
+ err:
+ return res;
+}
+
+static void __exit fuse_exit(void)
+{
+ printk(KERN_DEBUG "fuse exit\n");
+
+ fuse_ctl_cleanup();
+ fuse_sysfs_cleanup();
+ fuse_fs_cleanup();
+ fuse_dev_cleanup();
+}
+
+module_init(fuse_init);
+module_exit(fuse_exit);