aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic-2.6/patches-2.6.27
Commit message (Collapse)AuthorAgeFilesLines
* remove support for 2.6.27, no target is using it anymoreFlorian Fainelli2009-10-2978-26479/+0
| | | | SVN-Revision: 18201
* since we are using newer iptables version, we don't need this patch anymoreImre Kaloz2009-10-261-103/+0
| | | | SVN-Revision: 18162
* ssb: sync with wireless-2.6Felix Fietkau2009-10-111-0/+1863
| | | | SVN-Revision: 18022
* remove ipset support from core, it is now provided by xtables-addons from ↵Nicolas Thill2009-10-031-7749/+0
| | | | | | the packages feed SVN-Revision: 17844
* inform users when the in-kernel mips FPU emulator is disabled (#5774)Florian Fainelli2009-09-281-1/+3
| | | | SVN-Revision: 17794
* Update ipset to version 3.2Hauke Mehrtens2009-09-271-2781/+2842
| | | | SVN-Revision: 17764
* kernel: wrong image_cmdline patches has been sent by accidentaly, replace themGabor Juhos2009-09-111-31/+0
| | | | SVN-Revision: 17570
* kernel: get rid of the simple_prom_emulator, it is not really usefulGabor Juhos2009-09-101-50/+0
| | | | SVN-Revision: 17567
* kernel: add generic image_cmdline hack to MIPS targetsGabor Juhos2009-09-102-6/+65
| | | | SVN-Revision: 17563
* mode cygwin fixes for scripts/mod/file2aliasFlorian Fainelli2009-08-271-1/+1
| | | | SVN-Revision: 17420
* fix for commit r17276Hauke Mehrtens2009-08-151-0/+11
| | | | SVN-Revision: 17280
* Do not build the mac80211 module in the kernel. The attributes ↵Hauke Mehrtens2009-08-151-0/+11
| | | | | | skb->do_not_encrypt and skb->requeue in include/linux/skbuff.h are not used my mac80211 any more. SVN-Revision: 17276
* refresh 2.6.27 patchesFlorian Fainelli2009-07-312-11/+13
| | | | SVN-Revision: 17061
* fix a >2 year old stack overflow in the mtd rootfs split patch which only ↵Felix Fietkau2009-07-211-10/+9
| | | | | | caused issues on the orion platform on 2.6.30. also merge the squashfs4 fix into the rootfs split patch SVN-Revision: 16944
* update to latest 2.6.27.26 stable kernelFlorian Fainelli2009-07-037-41/+20
| | | | SVN-Revision: 16659
* turn off the Algorithmics FPU emulator by default now that we have software ↵Florian Fainelli2009-06-281-0/+163
| | | | | | floating point enabled in the toolchain, please recompile your toolchains and kernels SVN-Revision: 16605
* also apply gcc4.4.0 specific patches to 2.6.27 (#5318)Florian Fainelli2009-06-132-0/+334
| | | | SVN-Revision: 16439
* ocf: fix uninitialized variable access (thx, Dakon)Felix Fietkau2009-06-011-1/+1
| | | | SVN-Revision: 16285
* refresh patchesFlorian Fainelli2009-05-281-2/+2
| | | | SVN-Revision: 16156
* Remove pcmcia support from ssb.Hauke Mehrtens2009-05-241-9/+0
| | | | SVN-Revision: 16023
* make kernels use /etc/preinit by defaultImre Kaloz2009-05-141-0/+14
| | | | SVN-Revision: 15840
* Add support for pcmcia to ssb. b43 will build now with pcmcia support.Hauke Mehrtens2009-05-111-0/+9
| | | | SVN-Revision: 15784
* Refresh patches.Hauke Mehrtens2009-05-114-33/+33
| | | | SVN-Revision: 15776
* update imq patches for 2.6.26, 2.6.27, 2.6.28, 2.6.29 and 2.6.30Jo-Philipp Wich2009-05-074-329/+603
| | | | SVN-Revision: 15655
* refresh patchesImre Kaloz2009-05-044-17/+17
| | | | SVN-Revision: 15581
* refreshed layer7 patches for 2.6.26.8, 2.6.27.21, 2.6.28.9 and 2.6.29.1Jo-Philipp Wich2009-04-302-36/+51
| | | | SVN-Revision: 15502
* fix a vfs api bug in squashfs triggered by zabbix-agent (#4365)Felix Fietkau2009-04-041-0/+11
| | | | SVN-Revision: 15098
* add a workaround on kmod-textsearch modules (#4832)Florian Fainelli2009-03-261-0/+19
| | | | SVN-Revision: 15047
* refresh patchesHauke Mehrtens2009-03-242-19/+11
| | | | SVN-Revision: 15018
* remove custom phy hardware detection patch - it is no longer necessaryFelix Fietkau2009-03-221-26/+0
| | | | SVN-Revision: 14979
* fix mmap issues caused by the vm_exports patch (thx, danm)Felix Fietkau2009-03-161-1/+1
| | | | SVN-Revision: 14902
* also make 2.6.27 use the sprom fallbackFlorian Fainelli2009-02-271-0/+115
| | | | SVN-Revision: 14684
* fix IMQ on linux 2.6.27 and 2.6.28Felix Fietkau2009-02-211-0/+75
| | | | SVN-Revision: 14599
* merge netfilter changes to 2.6.27Felix Fietkau2009-02-215-1165/+7
| | | | SVN-Revision: 14598
* generic-2.6: remove TARPIT and CHAOS patches and config optionsGabor Juhos2009-02-083-1173/+1
| | | | SVN-Revision: 14456
* switch to 2.6.27.12Gabor Juhos2009-01-232-2/+2
| | | | SVN-Revision: 14157
* add some extra kernel exports to be used by ashmem and binder laterFelix Fietkau2009-01-141-0/+114
| | | | SVN-Revision: 14033
* update to 2.6.27.9 and refresh patchesGabor Juhos2008-12-162-4/+4
| | | | SVN-Revision: 13659
* add simple prom emulator to 2.6.2{7,8} as wellGabor Juhos2008-12-151-0/+50
| | | | SVN-Revision: 13653
* Alloz aead and friends to compile for 2.6.27 targets no idea it could not ↵Florian Fainelli2008-12-111-0/+27
| | | | | | without a description SVN-Revision: 13606
* make mips_machine stuff available for other MIPS targetsGabor Juhos2008-12-021-0/+133
| | | | SVN-Revision: 13492
* refresh 2.6.27 patchesGabor Juhos2008-12-021-1/+1
| | | | SVN-Revision: 13490
* fix chaostables patch to have headers installed when using 'make -C $(KDIR) ↵Nicolas Thill2008-12-021-0/+10
| | | | | | headers_install' SVN-Revision: 13465
* backport the new style ds1672 driver and switch to itImre Kaloz2008-12-013-16/+248
| | | | SVN-Revision: 13457
* migrate gateworks board support to the new at24 eeprom driverImre Kaloz2008-11-301-0/+134
| | | | SVN-Revision: 13450
* refresh patchesImre Kaloz2008-11-303-42/+10
| | | | SVN-Revision: 13449
* refresh patchesImre Kaloz2008-11-301-2/+2
| | | | SVN-Revision: 13443
* Allow b43_pci_bridge to be default on, otherwise we can never enable it.Florian Fainelli2008-11-291-0/+11
| | | | SVN-Revision: 13432
* Make L7-filter compile with kernel 2.6.27 (#4253)Florian Fainelli2008-11-282-722/+770
| | | | Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
* the 'dev' member of the structure was removed in 2.6.27 (see: ↵Florian Fainelli2008-11-261-2/+2
| | | | | | | | http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=5ce2d488fe039ddd86a638496cf704df86c74eeb), therefore use the qdisc_dev() function instead. Signed-off-by: Russell Senior <seniorr@aracnet.com> SVN-Revision: 13366
f='#n1073'>1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450
/******************************************************************************
 * drivers/xen/blktap/blktap.c
 * 
 * Back-end driver for user level virtual block devices. This portion of the
 * driver exports a 'unified' block-device interface that can be accessed
 * by any operating system that implements a compatible front end. Requests
 * are remapped to a user-space memory region.
 *
 * Based on the blkback driver code.
 * 
 * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation; or, when distributed
 * separately from the Linux kernel or incorporated into other
 * software packages, subject to the following license:
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this source file (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <asm/hypervisor.h>
#include "common.h"
#include <xen/balloon.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/gfp.h>
#include <linux/poll.h>
#include <asm/tlbflush.h>
#include <linux/devfs_fs_kernel.h>

#define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
#define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */

/*
 * The maximum number of requests that can be outstanding at any time
 * is determined by 
 *
 *   [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] 
 *
 * where mmap_alloc < MAX_DYNAMIC_MEM.
 *
 * TODO:
 * mmap_alloc is initialised to 2 and should be adjustable on the fly via
 * sysfs.
 */
#define MAX_DYNAMIC_MEM 64
#define MAX_PENDING_REQS 64   
#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
#define MMAP_VADDR(_start, _req,_seg)                                   \
        (_start +                                                       \
         ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
         ((_seg) * PAGE_SIZE))
static int blkif_reqs = MAX_PENDING_REQS;
static int mmap_pages = MMAP_PAGES;

#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
		      * have a bunch of pages reserved for shared
		      * memory rings.
		      */

/*Data struct associated with each of the tapdisk devices*/
typedef struct tap_blkif {
	struct vm_area_struct *vma;   /*Shared memory area                   */
	unsigned long rings_vstart;   /*Kernel memory mapping                */
	unsigned long user_vstart;    /*User memory mapping                  */
	unsigned long dev_inuse;      /*One process opens device at a time.  */
	unsigned long dev_pending;    /*In process of being opened           */
	unsigned long ring_ok;        /*make this ring->state                */
	blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
	wait_queue_head_t wait;       /*for poll                             */
	unsigned long mode;           /*current switching mode               */
	int minor;                    /*Minor number for tapdisk device      */
	pid_t pid;                    /*tapdisk process id                   */
	enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
						  shutdown                   */
	unsigned long *idx_map;       /*Record the user ring id to kern 
					[req id, idx] tuple                  */
	blkif_t *blkif;               /*Associate blkif with tapdev          */
} tap_blkif_t;

/*Private data struct associated with the inode*/
typedef struct private_info {
	int idx;
} private_info_t;

/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
typedef struct domid_translate {
	unsigned short domid;
	unsigned short busid;
} domid_translate_t ;


domid_translate_t  translate_domid[MAX_TAP_DEV];
tap_blkif_t *tapfds[MAX_TAP_DEV];

static int __init set_blkif_reqs(char *str)
{
	get_option(&str, &blkif_reqs);
	return 1;
}
__setup("blkif_reqs=", set_blkif_reqs);

/* Run-time switchable: /sys/module/blktap/parameters/ */
static unsigned int log_stats = 0;
static unsigned int debug_lvl = 0;
module_param(log_stats, int, 0644);
module_param(debug_lvl, int, 0644);

/*
 * Each outstanding request that we've passed to the lower device layers has a 
 * 'pending_req' allocated to it. Each buffer_head that completes decrements 
 * the pendcnt towards zero. When it hits zero, the specified domain has a 
 * response queued for it, with the saved 'id' passed back.
 */
typedef struct {
	blkif_t       *blkif;
	unsigned long  id;
	unsigned short mem_idx;
	int            nr_pages;
	atomic_t       pendcnt;
	unsigned short operation;
	int            status;
	struct list_head free_list;
	int            inuse;
} pending_req_t;

static pending_req_t *pending_reqs[MAX_PENDING_REQS];
static struct list_head pending_free;
static DEFINE_SPINLOCK(pending_free_lock);
static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
static int alloc_pending_reqs;

typedef unsigned int PEND_RING_IDX;

static inline int MASK_PEND_IDX(int i) { 
	return (i & (MAX_PENDING_REQS-1)); 
}

static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
	return (req - pending_reqs[idx]);
}

#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)

#define BLKBACK_INVALID_HANDLE (~0)

typedef struct mmap_page {
	unsigned long start;
	struct page *mpage;
} mmap_page_t;

static mmap_page_t mmap_start[MAX_DYNAMIC_MEM];
static unsigned short mmap_alloc = 0;
static unsigned short mmap_lock = 0;
static unsigned short mmap_inuse = 0;
static unsigned long *pending_addrs[MAX_DYNAMIC_MEM];

/******************************************************************
 * GRANT HANDLES
 */

/* When using grant tables to map a frame for device access then the
 * handle returned must be used to unmap the frame. This is needed to
 * drop the ref count on the frame.
 */
struct grant_handle_pair
{
        grant_handle_t kernel;
        grant_handle_t user;
};

static struct grant_handle_pair 
    pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
#define pending_handle(_id, _idx, _i) \
    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
    + (_i)])


static int blktap_read_ufe_ring(int idx); /*local prototypes*/

#define BLKTAP_MINOR 0  /*/dev/xen/blktap resides at device number
			  major=254, minor numbers begin at 0            */ 
#define BLKTAP_DEV_MAJOR 254         /* TODO: Make major number dynamic  *
                                      * and create devices in the kernel *
				      */
#define BLKTAP_DEV_DIR  "/dev/xen"

/* blktap IOCTLs: */
#define BLKTAP_IOCTL_KICK_FE         1
#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
#define BLKTAP_IOCTL_SETMODE         3
#define BLKTAP_IOCTL_SENDPID	     4
#define BLKTAP_IOCTL_NEWINTF	     5
#define BLKTAP_IOCTL_MINOR	     6
#define BLKTAP_IOCTL_MAJOR	     7
#define BLKTAP_QUERY_ALLOC_REQS      8
#define BLKTAP_IOCTL_FREEINTF        9
#define BLKTAP_IOCTL_PRINT_IDXS      100  

/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp.             */

#define BLKTAP_MODE_INTERPOSE \
           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)


static inline int BLKTAP_MODE_VALID(unsigned long arg)
{
	return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
		(arg == BLKTAP_MODE_INTERCEPT_FE) ||
                (arg == BLKTAP_MODE_INTERPOSE   ));
}

/* Requests passing through the tap to userspace are re-assigned an ID.
 * We must record a mapping between the BE [IDX,ID] tuple and the userspace
 * ring ID. 
 */

static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
{
        return ((fe_dom << 16) | MASK_PEND_IDX(idx));
}

extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
{
        return (PEND_RING_IDX)(id & 0x0000ffff);
}

extern inline int ID_TO_MIDX(unsigned long id)
{
        return (int)(id >> 16);
}

#define INVALID_REQ 0xdead0000

/*TODO: Convert to a free list*/
static inline int GET_NEXT_REQ(unsigned long *idx_map)
{
	int i;
	for (i = 0; i < MAX_PENDING_REQS; i++)
		if (idx_map[i] == INVALID_REQ) return i;

	return INVALID_REQ;
}


#define BLKTAP_INVALID_HANDLE(_g) \
    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))

#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
    } while(0)


/******************************************************************
 * BLKTAP VM OPS
 */

static struct page *blktap_nopage(struct vm_area_struct *vma,
				  unsigned long address,
				  int *type)
{
	/*
	 * if the page has not been mapped in by the driver then return
	 * NOPAGE_SIGBUS to the domain.
	 */

	return NOPAGE_SIGBUS;
}

struct vm_operations_struct blktap_vm_ops = {
	nopage:   blktap_nopage,
};

/******************************************************************
 * BLKTAP FILE OPS
 */
 
/*Function Declarations*/
static int get_next_free_dev(void);
static int blktap_open(struct inode *inode, struct file *filp);
static int blktap_release(struct inode *inode, struct file *filp);
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
static int blktap_ioctl(struct inode *inode, struct file *filp,
                        unsigned int cmd, unsigned long arg);
static unsigned int blktap_poll(struct file *file, poll_table *wait);

struct miscdevice *set_misc(int minor, char *name, int dev);

static struct file_operations blktap_fops = {
	.owner   = THIS_MODULE,
	.poll    = blktap_poll,
	.ioctl   = blktap_ioctl,
	.open    = blktap_open,
	.release = blktap_release,
	.mmap    = blktap_mmap,
};


static int get_next_free_dev(void)
{
	tap_blkif_t *info;
	int i = 0, ret = -1;
	unsigned long flags;

	spin_lock_irqsave(&pending_free_lock, flags);
	
	while (i < MAX_TAP_DEV) {
		info = tapfds[i];
		if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
			&& (info->dev_pending == 0) ) {
			info->dev_pending = 1;
			ret = i;
			goto done;
		}
		i++;
	}
	
done:
	spin_unlock_irqrestore(&pending_free_lock, flags);
	return ret;
}

int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
{
	int i;
		
	for (i = 0; i < MAX_TAP_DEV; i++)
		if ( (translate_domid[i].domid == domid)
		    && (translate_domid[i].busid == xenbus_id) ) {
			tapfds[i]->blkif = blkif;
			tapfds[i]->status = RUNNING;
			return i;
		}
	return -1;
}

void signal_tapdisk(int idx) 
{
	tap_blkif_t *info;
	struct task_struct *ptask;

	info = tapfds[idx];
	if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
		ptask = find_task_by_pid(info->pid);
		if (ptask) { 
			info->status = CLEANSHUTDOWN;
 		}
	}
	info->blkif = NULL;
	return;
}

static int blktap_open(struct inode *inode, struct file *filp)
{
	blkif_sring_t *sring;
	int idx = iminor(inode) - BLKTAP_MINOR;
	tap_blkif_t *info;
	private_info_t *prv;
	int i;
	
	if (tapfds[idx] == NULL) {
		WPRINTK("Unable to open device /dev/xen/blktap%d\n",
		       idx);
		return -ENOMEM;
	}
	DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
	
	info = tapfds[idx];
	
	/*Only one process can access device at a time*/
	if (test_and_set_bit(0, &info->dev_inuse))
		return -EBUSY;

	info->dev_pending = 0;
	    
	/* Allocate the fe ring. */
	sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
	if (sring == NULL)
		goto fail_nomem;

	SetPageReserved(virt_to_page(sring));
    
	SHARED_RING_INIT(sring);
	FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
	
	prv = kzalloc(sizeof(private_info_t),GFP_KERNEL);
	prv->idx = idx;
	filp->private_data = prv;
	info->vma = NULL;

	info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, 
				GFP_KERNEL);
	
	if (idx > 0) {
		init_waitqueue_head(&info->wait);
		for (i = 0; i < MAX_PENDING_REQS; i++) 
			info->idx_map[i] = INVALID_REQ;
	}

	DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
	return 0;

 fail_nomem:
	return -ENOMEM;
}

static int blktap_release(struct inode *inode, struct file *filp)
{
	int idx = iminor(inode) - BLKTAP_MINOR;
	tap_blkif_t *info;
	
	if (tapfds[idx] == NULL) {
		WPRINTK("Trying to free device that doesn't exist "
		       "[/dev/xen/blktap%d]\n",idx);
		return -1;
	}
	info = tapfds[idx];
	info->dev_inuse = 0;
	DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx);

	/* Free the ring page. */
	ClearPageReserved(virt_to_page(info->ufe_ring.sring));
	free_page((unsigned long) info->ufe_ring.sring);

	/* Clear any active mappings and free foreign map table */
	if (info->vma) {
		zap_page_range(
			info->vma, info->vma->vm_start, 
			info->vma->vm_end - info->vma->vm_start, NULL);
		info->vma = NULL;
	}
	
	if (filp->private_data) kfree(filp->private_data);

	if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
		kthread_stop(info->blkif->xenblkd);
		info->blkif->xenblkd = NULL;
		info->status = CLEANSHUTDOWN;
	}	
	return 0;
}


/* Note on mmap:
 * We need to map pages to user space in a way that will allow the block
 * subsystem set up direct IO to them.  This couldn't be done before, because
 * there isn't really a sane way to translate a user virtual address down to a 
 * physical address when the page belongs to another domain.
 *
 * My first approach was to map the page in to kernel memory, add an entry
 * for it in the physical frame list (using alloc_lomem_region as in blkback)
 * and then attempt to map that page up to user space.  This is disallowed
 * by xen though, which realizes that we don't really own the machine frame
 * underlying the physical page.
 *
 * The new approach is to provide explicit support for this in xen linux.
 * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
 * mapped from other vms.  vma->vm_private_data is set up as a mapping 
 * from pages to actual page structs.  There is a new clause in get_user_pages
 * that does the right thing for this sort of mapping.
 */
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
{
	int size;
	struct page **map;
	int i;
	private_info_t *prv;
	tap_blkif_t *info;

	/*Retrieve the dev info*/
	prv = (private_info_t *)filp->private_data;
	if (prv == NULL) {
		WPRINTK("blktap: mmap, retrieving idx failed\n");
		return -ENOMEM;
	}
	info = tapfds[prv->idx];
	
	vma->vm_flags |= VM_RESERVED;
	vma->vm_ops = &blktap_vm_ops;

	size = vma->vm_end - vma->vm_start;
	if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
		WPRINTK("you _must_ map exactly %d pages!\n",
		       mmap_pages + RING_PAGES);
		return -EAGAIN;
	}

	size >>= PAGE_SHIFT;
	info->rings_vstart = vma->vm_start;
	info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
    
	/* Map the ring pages to the start of the region and reserve it. */
	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

	if (remap_pfn_range(vma, vma->vm_start, 
			    __pa(info->ufe_ring.sring) >> PAGE_SHIFT, 
			    PAGE_SIZE, vma->vm_page_prot)) {
		WPRINTK("Mapping user ring failed!\n");
		goto fail;
	}

	/* Mark this VM as containing foreign pages, and set up mappings. */
	map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
		      * sizeof(struct page_struct*),
		      GFP_KERNEL);
	if (map == NULL) {
		WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
		goto fail;
	}

	for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
		map[i] = NULL;
    
	vma->vm_private_data = map;
	vma->vm_flags |= VM_FOREIGN;

	info->vma = vma;
	info->ring_ok = 1;
	return 0;
 fail:
	/* Clear any active mappings. */
	zap_page_range(vma, vma->vm_start, 
		       vma->vm_end - vma->vm_start, NULL);

	return -ENOMEM;
}


static int blktap_ioctl(struct inode *inode, struct file *filp,
                        unsigned int cmd, unsigned long arg)
{
	int idx = iminor(inode) - BLKTAP_MINOR;
	switch(cmd) {
	case BLKTAP_IOCTL_KICK_FE: 
	{
		/* There are fe messages to process. */
		return blktap_read_ufe_ring(idx);
	}
	case BLKTAP_IOCTL_SETMODE:
	{
		tap_blkif_t *info = tapfds[idx];
		
		if ( (idx > 0) && (idx < MAX_TAP_DEV) 
		     && (tapfds[idx] != NULL) ) 
		{
			if (BLKTAP_MODE_VALID(arg)) {
				info->mode = arg;
				/* XXX: may need to flush rings here. */
				DPRINTK("blktap: set mode to %lx\n", 
				       arg);
				return 0;
			}
		}
		return 0;
	}
	case BLKTAP_IOCTL_PRINT_IDXS:
        {
		tap_blkif_t *info = tapfds[idx];
		
		if ( (idx > 0) && (idx < MAX_TAP_DEV) 
		     && (tapfds[idx] != NULL) ) 
		{
			printk("User Rings: \n-----------\n");
			printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
				"| req_prod: %2d, rsp_prod: %2d\n",
				info->ufe_ring.rsp_cons,
				info->ufe_ring.req_prod_pvt,
				info->ufe_ring.sring->req_prod,
				info->ufe_ring.sring->rsp_prod);
		}
            	return 0;
        }
	case BLKTAP_IOCTL_SENDPID:
	{
		tap_blkif_t *info = tapfds[idx];
		
		if ( (idx > 0) && (idx < MAX_TAP_DEV) 
		     && (tapfds[idx] != NULL) ) 
		{
			info->pid = (pid_t)arg;
			DPRINTK("blktap: pid received %d\n", 
			       info->pid);
		}
		return 0;
	}
	case BLKTAP_IOCTL_NEWINTF:
	{		
		uint64_t val = (uint64_t)arg;
		domid_translate_t *tr = (domid_translate_t *)&val;
		int newdev;

		DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
		       tr->domid, tr->busid);
		newdev = get_next_free_dev();
		if (newdev < 1) {
			WPRINTK("Error initialising /dev/xen/blktap - "
				"No more devices\n");
			return -1;
		}
		translate_domid[newdev].domid = tr->domid;
		translate_domid[newdev].busid = tr->busid;
		return newdev;
	}
	case BLKTAP_IOCTL_FREEINTF:
	{
		unsigned long dev = arg;
		tap_blkif_t *info = NULL;

		if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];

		if ( (info != NULL) && (info->dev_pending) )
			info->dev_pending = 0;
		return 0;
	}
	case BLKTAP_IOCTL_MINOR:
	{
		unsigned long dev = arg;
		tap_blkif_t *info = NULL;
		
		if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
		
		if (info != NULL) return info->minor;
		else return -1;
	}
	case BLKTAP_IOCTL_MAJOR:
		return BLKTAP_DEV_MAJOR;

	case BLKTAP_QUERY_ALLOC_REQS:
	{
		WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
		       alloc_pending_reqs, blkif_reqs);
		return (alloc_pending_reqs/blkif_reqs) * 100;
	}
	}
	return -ENOIOCTLCMD;
}

static unsigned int blktap_poll(struct file *file, poll_table *wait)
{
	private_info_t *prv;
	tap_blkif_t *info;
	
	/*Retrieve the dev info*/
	prv = (private_info_t *)file->private_data;
	if (prv == NULL) {
		WPRINTK(" poll, retrieving idx failed\n");
		return 0;
	}
	
	if (prv->idx == 0) return 0;
	
	info = tapfds[prv->idx];
	
	poll_wait(file, &info->wait, wait);
	if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
		flush_tlb_all();
		RING_PUSH_REQUESTS(&info->ufe_ring);
		return POLLIN | POLLRDNORM;
	}
	return 0;
}

void blktap_kick_user(int idx)
{
	tap_blkif_t *info;

	if (idx == 0) return;
	
	info = tapfds[idx];
	
	if (info != NULL) wake_up_interruptible(&info->wait);
	return;
}

static int do_block_io_op(blkif_t *blkif);
static void dispatch_rw_block_io(blkif_t *blkif,
				 blkif_request_t *req,
				 pending_req_t *pending_req);
static void make_response(blkif_t *blkif, unsigned long id, 
                          unsigned short op, int st);

/******************************************************************
 * misc small helpers
 */
static int req_increase(void)
{
	int i, j;
	struct page *page;
	unsigned long flags;
	int ret;

	spin_lock_irqsave(&pending_free_lock, flags);

	ret = -EINVAL;
	if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) 
		goto done;

#ifdef __ia64__
	extern unsigned long alloc_empty_foreign_map_page_range(
		unsigned long pages);
	mmap_start[mmap_alloc].start = (unsigned long)
		alloc_empty_foreign_map_page_range(mmap_pages);
#else /* ! ia64 */
	page = balloon_alloc_empty_page_range(mmap_pages);
	ret = -ENOMEM;
	if (page == NULL) {
		printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__);
		goto done;
	}

	/* Pin all of the pages. */
	for (i=0; i<mmap_pages; i++)
		get_page(&page[i]);

	mmap_start[mmap_alloc].start = 
		(unsigned long)pfn_to_kaddr(page_to_pfn(page));
	mmap_start[mmap_alloc].mpage = page;

#endif

	pending_reqs[mmap_alloc]  = kzalloc(sizeof(pending_req_t) *
					blkif_reqs, GFP_KERNEL);
	pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) *
					mmap_pages, GFP_KERNEL);

	ret = -ENOMEM;
	if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
		kfree(pending_reqs[mmap_alloc]);
		kfree(pending_addrs[mmap_alloc]);
		WPRINTK("%s: out of memory\n", __FUNCTION__); 
		ret = -ENOMEM;
		goto done;
	}

	ret = 0;

	DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
	        __FUNCTION__, blkif_reqs, mmap_pages, 
	       mmap_start[mmap_alloc].start);

	BUG_ON(mmap_start[mmap_alloc].start == 0);

	for (i = 0; i < mmap_pages; i++) 
		pending_addrs[mmap_alloc][i] = 
			mmap_start[mmap_alloc].start + (i << PAGE_SHIFT);

	for (i = 0; i < MAX_PENDING_REQS ; i++) {
		list_add_tail(&pending_reqs[mmap_alloc][i].free_list, 
			      &pending_free);
		pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
		for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
			BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, 
								 i, j));
	}

	mmap_alloc++;
	DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
 done:
	spin_unlock_irqrestore(&pending_free_lock, flags);
	return ret;
}

static void mmap_req_del(int mmap)
{
	int i;
	struct page *page;

	/*Spinlock already acquired*/
	kfree(pending_reqs[mmap]);
	kfree(pending_addrs[mmap]);

#ifdef __ia64__
	/*Not sure what goes here yet!*/
#else

	/* Unpin all of the pages. */
	page = mmap_start[mmap].mpage;
	for (i=0; i<mmap_pages; i++)
		put_page(&page[i]);

	balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages);
#endif

	mmap_lock = 0;
	DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
	mmap_alloc--;
}

/*N.B. Currently unused - will be accessed via sysfs*/
static void req_decrease(void)
{
	pending_req_t *req;
	int i;
	unsigned long flags;

	spin_lock_irqsave(&pending_free_lock, flags);

	DPRINTK("Req decrease called.\n");
	if (mmap_lock || mmap_alloc == 1) 
		goto done;

	mmap_lock = 1;
	mmap_inuse = MAX_PENDING_REQS;
	
        /*Go through reqs and remove any that aren't in use*/
	for (i = 0; i < MAX_PENDING_REQS ; i++) {
		req = &pending_reqs[mmap_alloc-1][i];
		if (req->inuse == 0) {
			list_del(&req->free_list);
			mmap_inuse--;
		}
	}
	if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
 done:
	spin_unlock_irqrestore(&pending_free_lock, flags);
	return;
}

static pending_req_t* alloc_req(void)
{
	pending_req_t *req = NULL;
	unsigned long flags;

	spin_lock_irqsave(&pending_free_lock, flags);

	if (!list_empty(&pending_free)) {
		req = list_entry(pending_free.next, pending_req_t, free_list);
		list_del(&req->free_list);
	}

	if (req) {
		req->inuse = 1;
		alloc_pending_reqs++;
	}
	spin_unlock_irqrestore(&pending_free_lock, flags);

	return req;
}

static void free_req(pending_req_t *req)
{
	unsigned long flags;
	int was_empty;

	spin_lock_irqsave(&pending_free_lock, flags);

	alloc_pending_reqs--;
	req->inuse = 0;
	if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
		mmap_inuse--;
		if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
		spin_unlock_irqrestore(&pending_free_lock, flags);
		return;
	}
	was_empty = list_empty(&pending_free);
	list_add(&req->free_list, &pending_free);

	spin_unlock_irqrestore(&pending_free_lock, flags);

	if (was_empty)
		wake_up(&pending_free_wq);
}

static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int 
			    tapidx)
{
	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
	unsigned int i, invcount = 0;
	struct grant_handle_pair *khandle;
	uint64_t ptep;
	int ret, mmap_idx;
	unsigned long kvaddr, uvaddr;

	tap_blkif_t *info = tapfds[tapidx];
	
	if (info == NULL) {
		WPRINTK("fast_flush: Couldn't get info!\n");
		return;
	}
	mmap_idx = req->mem_idx;

	for (i = 0; i < req->nr_pages; i++) {
		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i);
		uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);

		khandle = &pending_handle(mmap_idx, k_idx, i);
		if (BLKTAP_INVALID_HANDLE(khandle)) {
			WPRINTK("BLKTAP_INVALID_HANDLE\n");
			continue;
		}
		gnttab_set_unmap_op(&unmap[invcount], 
			MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i), 
				    GNTMAP_host_map, khandle->kernel);
		invcount++;

		if (create_lookup_pte_addr(
		    info->vma->vm_mm,
		    MMAP_VADDR(info->user_vstart, u_idx, i), 
		    &ptep) !=0) {
			WPRINTK("Couldn't get a pte addr!\n");
			return;
		}

		gnttab_set_unmap_op(&unmap[invcount], 
			ptep, GNTMAP_host_map,
			khandle->user);
		invcount++;
            
		BLKTAP_INVALIDATE_HANDLE(khandle);
	}
	ret = HYPERVISOR_grant_table_op(
		GNTTABOP_unmap_grant_ref, unmap, invcount);
	BUG_ON(ret);
	
	if (info->vma != NULL)
		zap_page_range(info->vma, 
			       MMAP_VADDR(info->user_vstart, u_idx, 0), 
			       req->nr_pages << PAGE_SHIFT, NULL);
}

/******************************************************************
 * SCHEDULER FUNCTIONS
 */

static void print_stats(blkif_t *blkif)
{
	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
	       current->comm, blkif->st_oo_req,
	       blkif->st_rd_req, blkif->st_wr_req);
	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
	blkif->st_rd_req = 0;
	blkif->st_wr_req = 0;
	blkif->st_oo_req = 0;
}

int tap_blkif_schedule(void *arg)
{
	blkif_t *blkif = arg;

	blkif_get(blkif);

	if (debug_lvl)
		printk(KERN_DEBUG "%s: started\n", current->comm);

	while (!kthread_should_stop()) {
		wait_event_interruptible(
			blkif->wq,
			blkif->waiting_reqs || kthread_should_stop());
		wait_event_interruptible(
			pending_free_wq,
			!list_empty(&pending_free) || kthread_should_stop());

		blkif->waiting_reqs = 0;
		smp_mb(); /* clear flag *before* checking for work */

		if (do_block_io_op(blkif))
			blkif->waiting_reqs = 1;

		if (log_stats && time_after(jiffies, blkif->st_print))
			print_stats(blkif);
	}

	if (log_stats)
		print_stats(blkif);
	if (debug_lvl)
		printk(KERN_DEBUG "%s: exiting\n", current->comm);

	blkif->xenblkd = NULL;
	blkif_put(blkif);

	return 0;
}

/******************************************************************
 * COMPLETION CALLBACK -- Called by user level ioctl()
 */

static int blktap_read_ufe_ring(int idx)
{
	/* This is called to read responses from the UFE ring. */
	RING_IDX i, j, rp;
	blkif_response_t *resp;
	blkif_t *blkif=NULL;
	int pending_idx, usr_idx, mmap_idx;
	pending_req_t *pending_req;
	tap_blkif_t *info;
	
	info = tapfds[idx];
	if (info == NULL) {
		return 0;
	}

	/* We currently only forward packets in INTERCEPT_FE mode. */
	if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
		return 0;

	/* for each outstanding message on the UFEring  */
	rp = info->ufe_ring.sring->rsp_prod;
	rmb();
        
	for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
		resp = RING_GET_RESPONSE(&info->ufe_ring, i);
		++info->ufe_ring.rsp_cons;

		/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
		usr_idx = (int)resp->id;
		pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
		mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);

		if ( (mmap_idx >= mmap_alloc) || 
		   (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
			WPRINTK("Incorrect req map"
			       "[%d], internal map [%d,%d (%d)]\n", 
			       usr_idx, mmap_idx, 
			       ID_TO_IDX(info->idx_map[usr_idx]),
			       MASK_PEND_IDX(
				       ID_TO_IDX(info->idx_map[usr_idx])));

		pending_req = &pending_reqs[mmap_idx][pending_idx];
		blkif = pending_req->blkif;

		for (j = 0; j < pending_req->nr_pages; j++) {

			unsigned long kvaddr, uvaddr;
			struct page **map = info->vma->vm_private_data;
			struct page *pg;
			int offset; 

			uvaddr  = MMAP_VADDR(info->user_vstart, usr_idx, j);
			kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
					    pending_idx, j);

			pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
			ClearPageReserved(pg);
			offset = (uvaddr - info->vma->vm_start) 
				>> PAGE_SHIFT;
			map[offset] = NULL;
		}
		fast_flush_area(pending_req, pending_idx, usr_idx, idx); 
		make_response(blkif, pending_req->id, resp->operation,
			      resp->status);
		info->idx_map[usr_idx] = INVALID_REQ;
		blkif_put(pending_req->blkif);
		free_req(pending_req);
	}
		
	return 0;
}


/******************************************************************************
 * NOTIFICATION FROM GUEST OS.
 */

static void blkif_notify_work(blkif_t *blkif)
{
	blkif->waiting_reqs = 1;
	wake_up(&blkif->wq);
}

irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
	blkif_notify_work(dev_id);
	return IRQ_HANDLED;
}



/******************************************************************
 * DOWNWARD CALLS -- These interface with the block-device layer proper.
 */
static int print_dbug = 1;
static int do_block_io_op(blkif_t *blkif)
{
	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
	blkif_request_t *req;
	pending_req_t *pending_req;
	RING_IDX rc, rp;
	int more_to_do = 0;
	tap_blkif_t *info;

	rc = blk_ring->req_cons;
	rp = blk_ring->sring->req_prod;
	rmb(); /* Ensure we see queued requests up to 'rp'. */

	/*Check blkif has corresponding UE ring*/
	if (blkif->dev_num == -1) {
		/*oops*/
		if (print_dbug) {
			WPRINTK("Corresponding UE " 
			       "ring does not exist!\n");
			print_dbug = 0; /*We only print this message once*/
		}
		return 1; 
	}

	info = tapfds[blkif->dev_num];
	if (info == NULL || !info->dev_inuse) {
		if (print_dbug) {
			WPRINTK("Can't get UE info!\n");
			print_dbug = 0;
		}
		return 1;
	}

	while (rc != rp) {
		
		if (RING_FULL(&info->ufe_ring)) {
			WPRINTK("RING_FULL! More to do\n");
			more_to_do = 1;
			break;
		}
		
		if (RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
			WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
			       " More to do\n");
			more_to_do = 1;
			break;		
		}

		pending_req = alloc_req();
		if (NULL == pending_req) {
			blkif->st_oo_req++;
			more_to_do = 1;
			break;
		}

		req = RING_GET_REQUEST(blk_ring, rc);
		blk_ring->req_cons = ++rc; /* before make_response() */	

		switch (req->operation) {
		case BLKIF_OP_READ:
			blkif->st_rd_req++;
			dispatch_rw_block_io(blkif, req, pending_req);
			break;

		case BLKIF_OP_WRITE:
			blkif->st_wr_req++;
			dispatch_rw_block_io(blkif, req, pending_req);
			break;

		default:
			WPRINTK("unknown operation [%d]\n",
				req->operation);
			make_response(blkif, req->id, req->operation,
				      BLKIF_RSP_ERROR);
			free_req(pending_req);
			break;
		}
	}
		
	blktap_kick_user(blkif->dev_num);

	return more_to_do;
}

static void dispatch_rw_block_io(blkif_t *blkif,
				 blkif_request_t *req,
				 pending_req_t *pending_req)
{
	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
	int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
	unsigned int nseg;
	int ret, i;
	tap_blkif_t *info = tapfds[blkif->dev_num];
	uint64_t sector;
	
	blkif_request_t *target;
	int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
	int usr_idx = GET_NEXT_REQ(info->idx_map);
	uint16_t mmap_idx = pending_req->mem_idx;

	/*Check we have space on user ring - should never fail*/
	if(usr_idx == INVALID_REQ) goto fail_flush;
	
	/* Check that number of segments is sane. */
	nseg = req->nr_segments;
	if ( unlikely(nseg == 0) || 
	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
		WPRINTK("Bad number of segments in request (%d)\n", nseg);
		goto fail_response;
	}
	
	/* Make sure userspace is ready. */
	if (!info->ring_ok) {
		WPRINTK("blktap: ring not ready for requests!\n");
		goto fail_response;
	}

	if (RING_FULL(&info->ufe_ring)) {
		WPRINTK("blktap: fe_ring is full, can't add "
			"IO Request will be dropped. %d %d\n",
			RING_SIZE(&info->ufe_ring),
			RING_SIZE(&blkif->blk_ring));
		goto fail_response;
	}

	pending_req->blkif     = blkif;
	pending_req->id        = req->id;
	pending_req->operation = operation;
	pending_req->status    = BLKIF_RSP_OKAY;
	pending_req->nr_pages  = nseg;
	op = 0;
	for (i = 0; i < nseg; i++) {
		unsigned long uvaddr;
		unsigned long kvaddr;
		uint64_t ptep;
		struct page *page;
		uint32_t flags;

		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
				    pending_idx, i);
		page = virt_to_page(kvaddr);

		sector = req->sector_number + (8*i);
		if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
			WPRINTK("BLKTAP: Sector request greater" 
			       "than size\n");
			WPRINTK("BLKTAP: %s request sector" 
			       "[%llu,%llu], Total [%llu]\n",
			       (req->operation == 
				BLKIF_OP_WRITE ? "WRITE" : "READ"),
				(long long unsigned) sector,
				(long long unsigned) sector>>9,
				blkif->sectors);
		}

		flags = GNTMAP_host_map;
		if (operation == WRITE)
			flags |= GNTMAP_readonly;
		gnttab_set_map_op(&map[op], kvaddr, flags,
				  req->seg[i].gref, blkif->domid);
		op++;

		/* Now map it to user. */
		ret = create_lookup_pte_addr(info->vma->vm_mm, 
					     uvaddr, &ptep);
		if (ret) {
			WPRINTK("Couldn't get a pte addr!\n");
			fast_flush_area(pending_req, pending_idx, usr_idx, 
					blkif->dev_num);
			goto fail_flush;
		}

		flags = GNTMAP_host_map | GNTMAP_application_map
			| GNTMAP_contains_pte;
		if (operation == WRITE)
			flags |= GNTMAP_readonly;
		gnttab_set_map_op(&map[op], ptep, flags,
				  req->seg[i].gref, blkif->domid);
		op++;
	}

	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
	BUG_ON(ret);

	for (i = 0; i < (nseg*2); i+=2) {
		unsigned long uvaddr;
		unsigned long kvaddr;
		unsigned long offset;
		struct page *pg;

		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
				    pending_idx, i/2);

		if (unlikely(map[i].status != 0)) {
			WPRINTK("invalid kernel buffer -- "
				"could not remap it\n");
			goto fail_flush;
		}

		if (unlikely(map[i+1].status != 0)) {
			WPRINTK("invalid user buffer -- "
				"could not remap it\n");
			goto fail_flush;
		}

		pending_handle(mmap_idx, pending_idx, i/2).kernel 
			= map[i].handle;
		pending_handle(mmap_idx, pending_idx, i/2).user   
			= map[i+1].handle;
		set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
		offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
		((struct page **)info->vma->vm_private_data)[offset] =
			pg;
	}
	/* Mark mapped pages as reserved: */
	for (i = 0; i < req->nr_segments; i++) {
		unsigned long kvaddr;
		struct page *pg;

		kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
				    pending_idx, i);
		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
		SetPageReserved(pg);
	}
	
	/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
	info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);

	blkif_get(blkif);
	/* Finally, write the request message to the user ring. */
	target = RING_GET_REQUEST(&info->ufe_ring,
				  info->ufe_ring.req_prod_pvt);
	memcpy(target, req, sizeof(*req));
	target->id = usr_idx;
	info->ufe_ring.req_prod_pvt++;
	return;

 fail_flush:
	WPRINTK("Reached Fail_flush\n");
	fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
 fail_response:
	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
	free_req(pending_req);
} 



/******************************************************************
 * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
 */


static void make_response(blkif_t *blkif, unsigned long id, 
                          unsigned short op, int st)
{
	blkif_response_t *resp;
	unsigned long     flags;
	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
	int more_to_do = 0;
	int notify;

	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
	/* Place on the response ring for the relevant domain. */ 
	resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
	resp->id        = id;
	resp->operation = op;
	resp->status    = st;
	blk_ring->rsp_prod_pvt++;
	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);

	if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
		/*
		 * Tail check for pending requests. Allows frontend to avoid
		 * notifications if requests are already in flight (lower
		 * overheads and promotes batching).
		 */
		RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
	} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
		more_to_do = 1;

	}	
	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
	if (more_to_do)
		blkif_notify_work(blkif);
	if (notify)
		notify_remote_via_irq(blkif->irq);
}

static int __init blkif_init(void)
{
	int i,ret,blktap_dir;
	tap_blkif_t *info;

	if (!is_running_on_xen())
		return -ENODEV;

	INIT_LIST_HEAD(&pending_free);
        for(i = 0; i < 2; i++) {
		ret = req_increase();
		if (ret)
			break;
	}
	if (i == 0)
		return ret;

	tap_blkif_interface_init();

	alloc_pending_reqs = 0;

	tap_blkif_xenbus_init();

	/*Create the blktap devices, but do not map memory or waitqueue*/
	for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;

	ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops);
	blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);

	if ( (ret < 0)||(blktap_dir < 0) ) {
		WPRINTK("Couldn't register /dev/xen/blktap\n");
		return -ENOMEM;
	}	
	
	for(i = 0; i < MAX_TAP_DEV; i++ ) {
		info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
		if(tapfds[i] == NULL) return -ENOMEM;
		info->minor = i;
		info->pid = 0;
		info->blkif = NULL;

		ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i),
			S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);

		if(ret != 0) return -ENOMEM;
		info->dev_pending = info->dev_inuse = 0;

		DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
	}
	
	DPRINTK("Blktap device successfully created\n");

	return 0;
}

module_init(blkif_init);

MODULE_LICENSE("Dual BSD/GPL");