aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/at91
Commit message (Expand)AuthorAgeFilesLines
...
* at91: fix typo in usb kernel module makefileJohn Crispin2016-01-191-1/+1
* at91: remove SAMA5D3 from Default image build.John Crispin2016-01-191-2/+1
* at91: add package for high speed usb deviceJohn Crispin2016-01-191-0/+19
* at91: add 4.4 supportJohn Crispin2016-01-1910-0/+296
* all: drop old uci-defaults.shJo-Philipp Wich2015-12-111-1/+1
* all: remove redundant board.d/00_model filesJo-Philipp Wich2015-12-041-13/+0
* at91: switch from uci-defaults to board.dJo-Philipp Wich2015-12-032-8/+17
* at91: fix usb rate backport patch tab mangleJohn Crispin2015-08-171-14/+14
* at91: Backport determine_rate prototype fixJohn Crispin2015-08-171-0/+20
* at91: add Device Tree based board detectionFlorian Fainelli2015-07-044-3/+127
* at91: sama5d3: enable USB2 by defaultFlorian Fainelli2015-07-041-0/+2
* at91: enable Micrel PHY driverFlorian Fainelli2015-07-041-0/+1
* at91: fix imagebuilderJo-Philipp Wich2015-05-281-2/+3
* kernel: update 3.18 to 3.18.14Jonas Gorski2015-05-213-7/+7
* kernel: disable ARCH_NEEDS_CPU_IDLE_COUPLEDJohn Crispin2015-03-061-1/+0
* kernel: disbale HAMRADIO in generic configJohn Crispin2015-03-061-1/+0
* at91: add SAMA5D3-Xplained board UBI parameters to makefile.John Crispin2015-03-051-0/+3
* at91: Add dts for Exegin Q5x boards.John Crispin2015-02-193-0/+207
* at91: USB gadget subsystem cleanup and driver fix.John Crispin2015-02-195-11/+103
* at91: Enable power and reset controller drivers.John Crispin2015-02-191-0/+8
* at91: bump to v3.18John Crispin2015-02-188-1/+7
* at91: remove linux 3.10 supportFelix Fietkau2014-12-135-328/+0
* kernel: makefiles: make use of new var KERNEL_PATCHVERFelix Fietkau2014-10-241-1/+1
* at91: fix at91-adc dependenciesJohn Crispin2014-10-131-1/+1
* at91: remove support for TQMa9263John Crispin2014-10-064-488/+0
* kernel: do not deactivate CONFIG_BLK_DEVHauke Mehrtens2014-09-131-1/+0
* kernel: update 3.14 to 3.14.18Luka Perkov2014-09-103-13/+9
* at91: build images for AT91SAMA5D3XPLAINEDFlorian Fainelli2014-09-032-0/+16
* at91: add SAMA5D3 subtargetFlorian Fainelli2014-09-032-0/+19
* at91: move legacy Kconfig options to subtargetFlorian Fainelli2014-09-032-33/+33
* at91: switch to 3.14Florian Fainelli2014-09-031-2/+2
* at91: add 3.14 supportFlorian Fainelli2014-09-037-0/+576
* at91: move 3.10 specific files and patchesFlorian Fainelli2014-09-037-0/+0
* at91: add Kconfig symbol for CONFIG_USB_ATMEL_USBAFlorian Fainelli2014-09-031-0/+1
* at91: fix typo on CalaoFlorian Fainelli2014-09-031-1/+1
* at91: create a legacy subtargetFlorian Fainelli2014-09-035-1/+8
* target/linux/*/image/Makefile: use new dts path variableJohn Crispin2014-07-211-2/+2
* kernel: update to 3.10.49 Signed-off-by: John Crispin <blogic@openwrt.org>John Crispin2014-07-201-1/+1
* kernel: sort Kconfig options in kernel configsGabor Juhos2014-07-131-1/+1
* kernel: update 3.10.36->3.10.44John Crispin2014-06-261-1/+1
* kernel: make sure the new UBIBLOCK symbol does not break buildsJohn Crispin2014-06-111-0/+1
* kernel: modify mtd related patches for 3.14Hauke Mehrtens2014-05-081-1/+3
* at91: Add AT91 board and model name to /tmp/sysinfoHauke Mehrtens2014-04-291-0/+9
* at91: AT91SAM9G20-EK fixesHauke Mehrtens2014-04-293-2/+37
* Update AT91 kernel to 3.10.34Hauke Mehrtens2014-04-294-22/+38
* at91: fix typo in network fileHauke Mehrtens2013-11-171-1/+1
* at91: fix missing drivers/iio/adc/at91-adc.ko in kmod-at91-adcHauke Mehrtens2013-10-011-3/+3
* build: unify arm target cflagsFelix Fietkau2013-09-261-1/+1
* build: unify target independent optimization optionsFelix Fietkau2013-07-291-1/+1
* target: don't build jffs2 nor images by defaultJonas Gorski2013-06-051-1/+1
585'>585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400
/******************************************************************************
 * drivers/xen/blktap/blktap.c
 * 
 * Back-end driver for user level virtual block devices. This portion of the
 * driver exports a 'unified' block-device interface that can be accessed
 * by any operating system that implements a compatible front end. Requests
 * are remapped to a user-space memory region.
 *
 * Based on the blkback driver code.
 * 
 * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation; or, when distributed
 * separately from the Linux kernel or incorporated into other
 * software packages, subject to the following license:
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this source file (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <asm/hypervisor.h>
#include "common.h"
#include <xen/balloon.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/gfp.h>
#include <linux/poll.h>
#include <asm/tlbflush.h>
#include <linux/devfs_fs_kernel.h>

#define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
#define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */


struct class *xen_class;
EXPORT_SYMBOL_GPL(xen_class);

/*
 * Setup the xen class.  This should probably go in another file, but
 * since blktap is the only user of it so far, it gets to keep it.
 */
int setup_xen_class(void)
{
	int ret;

	if (xen_class)
		return 0;

	xen_class = class_create(THIS_MODULE, "xen");
	if ((ret = IS_ERR(xen_class))) {
		xen_class = NULL;
		return ret;
	}

	return 0;
}

/*
 * The maximum number of requests that can be outstanding at any time
 * is determined by 
 *
 *   [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] 
 *
 * where mmap_alloc < MAX_DYNAMIC_MEM.
 *
 * TODO:
 * mmap_alloc is initialised to 2 and should be adjustable on the fly via
 * sysfs.
 */
#define MAX_DYNAMIC_MEM 64
#define MAX_PENDING_REQS 64   
#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
#define MMAP_VADDR(_start, _req,_seg)                                   \
        (_start +                                                       \
         ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
         ((_seg) * PAGE_SIZE))
static int blkif_reqs = MAX_PENDING_REQS;
static int mmap_pages = MMAP_PAGES;

#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
		      * have a bunch of pages reserved for shared
		      * memory rings.
		      */

/*Data struct associated with each of the tapdisk devices*/
typedef struct tap_blkif {
	struct vm_area_struct *vma;   /*Shared memory area                   */
	unsigned long rings_vstart;   /*Kernel memory mapping                */
	unsigned long user_vstart;    /*User memory mapping                  */
	unsigned long dev_inuse;      /*One process opens device at a time.  */
	unsigned long dev_pending;    /*In process of being opened           */
	unsigned long ring_ok;        /*make this ring->state                */
	blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
	wait_queue_head_t wait;       /*for poll                             */
	unsigned long mode;           /*current switching mode               */
	int minor;                    /*Minor number for tapdisk device      */
	pid_t pid;                    /*tapdisk process id                   */
	enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
						  shutdown                   */
	unsigned long *idx_map;       /*Record the user ring id to kern 
					[req id, idx] tuple                  */
	blkif_t *blkif;               /*Associate blkif with tapdev          */
	int sysfs_set;                /*Set if it has a class device.        */
} tap_blkif_t;

/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
typedef struct domid_translate {
	unsigned short domid;
	unsigned short busid;
} domid_translate_t ;

static domid_translate_t  translate_domid[MAX_TAP_DEV];
static tap_blkif_t *tapfds[MAX_TAP_DEV];

static int __init set_blkif_reqs(char *str)
{
	get_option(&str, &blkif_reqs);
	return 1;
}
__setup("blkif_reqs=", set_blkif_reqs);

/* Run-time switchable: /sys/module/blktap/parameters/ */
static unsigned int log_stats = 0;
static unsigned int debug_lvl = 0;
module_param(log_stats, int, 0644);
module_param(debug_lvl, int, 0644);

/*
 * Each outstanding request that we've passed to the lower device layers has a 
 * 'pending_req' allocated to it. Each buffer_head that completes decrements 
 * the pendcnt towards zero. When it hits zero, the specified domain has a 
 * response queued for it, with the saved 'id' passed back.
 */
typedef struct {
	blkif_t       *blkif;
	unsigned long  id;
	unsigned short mem_idx;
	int            nr_pages;
	atomic_t       pendcnt;
	unsigned short operation;
	int            status;
	struct list_head free_list;
	int            inuse;
} pending_req_t;

static pending_req_t *pending_reqs[MAX_PENDING_REQS];
static struct list_head pending_free;
static DEFINE_SPINLOCK(pending_free_lock);
static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
static int alloc_pending_reqs;

typedef unsigned int PEND_RING_IDX;

static inline int MASK_PEND_IDX(int i) { 
	return (i & (MAX_PENDING_REQS-1));
}

static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
	return (req - pending_reqs[idx]);
}

#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)

#define BLKBACK_INVALID_HANDLE (~0)

static struct page **foreign_pages[MAX_DYNAMIC_MEM];
static inline unsigned long idx_to_kaddr(
	unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
{
	unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
	unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]);
	return (unsigned long)pfn_to_kaddr(pfn);
}

static unsigned short mmap_alloc = 0;
static unsigned short mmap_lock = 0;
static unsigned short mmap_inuse = 0;

/******************************************************************
 * GRANT HANDLES
 */

/* When using grant tables to map a frame for device access then the
 * handle returned must be used to unmap the frame. This is needed to
 * drop the ref count on the frame.
 */
struct grant_handle_pair
{
        grant_handle_t kernel;
        grant_handle_t user;
};

static struct grant_handle_pair 
    pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
#define pending_handle(_id, _idx, _i) \
    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
    + (_i)])


static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/

#define BLKTAP_MINOR 0  /*/dev/xen/blktap has a dynamic major */
#define BLKTAP_DEV_DIR  "/dev/xen"

static int blktap_major;

/* blktap IOCTLs: */
#define BLKTAP_IOCTL_KICK_FE         1
#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
#define BLKTAP_IOCTL_SETMODE         3
#define BLKTAP_IOCTL_SENDPID	     4
#define BLKTAP_IOCTL_NEWINTF	     5
#define BLKTAP_IOCTL_MINOR	     6
#define BLKTAP_IOCTL_MAJOR	     7
#define BLKTAP_QUERY_ALLOC_REQS      8
#define BLKTAP_IOCTL_FREEINTF        9
#define BLKTAP_IOCTL_PRINT_IDXS      100  

/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp.             */

#define BLKTAP_MODE_INTERPOSE \
           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)


static inline int BLKTAP_MODE_VALID(unsigned long arg)
{
	return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
		(arg == BLKTAP_MODE_INTERCEPT_FE) ||
                (arg == BLKTAP_MODE_INTERPOSE   ));
}

/* Requests passing through the tap to userspace are re-assigned an ID.
 * We must record a mapping between the BE [IDX,ID] tuple and the userspace
 * ring ID. 
 */

static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
{
        return ((fe_dom << 16) | MASK_PEND_IDX(idx));
}

extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
{
        return (PEND_RING_IDX)(id & 0x0000ffff);
}

extern inline int ID_TO_MIDX(unsigned long id)
{
        return (int)(id >> 16);
}

#define INVALID_REQ 0xdead0000

/*TODO: Convert to a free list*/
static inline int GET_NEXT_REQ(unsigned long *idx_map)
{
	int i;
	for (i = 0; i < MAX_PENDING_REQS; i++)
		if (idx_map[i] == INVALID_REQ)
			return i;

	return INVALID_REQ;
}


#define BLKTAP_INVALID_HANDLE(_g) \
    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))

#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
    } while(0)


/******************************************************************
 * BLKTAP VM OPS
 */

static struct page *blktap_nopage(struct vm_area_struct *vma,
				  unsigned long address,
				  int *type)
{
	/*
	 * if the page has not been mapped in by the driver then return
	 * NOPAGE_SIGBUS to the domain.
	 */

	return NOPAGE_SIGBUS;
}

struct vm_operations_struct blktap_vm_ops = {
	nopage:   blktap_nopage,
};

/******************************************************************
 * BLKTAP FILE OPS
 */
 
/*Function Declarations*/
static int get_next_free_dev(void);
static int blktap_open(struct inode *inode, struct file *filp);
static int blktap_release(struct inode *inode, struct file *filp);
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
static int blktap_ioctl(struct inode *inode, struct file *filp,
                        unsigned int cmd, unsigned long arg);
static unsigned int blktap_poll(struct file *file, poll_table *wait);

static struct file_operations blktap_fops = {
	.owner   = THIS_MODULE,
	.poll    = blktap_poll,
	.ioctl   = blktap_ioctl,
	.open    = blktap_open,
	.release = blktap_release,
	.mmap    = blktap_mmap,
};


static int get_next_free_dev(void)
{
	tap_blkif_t *info;
	int i = 0, ret = -1;
	unsigned long flags;

	spin_lock_irqsave(&pending_free_lock, flags);
	
	while (i < MAX_TAP_DEV) {
		info = tapfds[i];
		if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
			&& (info->dev_pending == 0) ) {
			info->dev_pending = 1;
			ret = i;
			goto done;
		}
		i++;
	}
	
done:
	spin_unlock_irqrestore(&pending_free_lock, flags);

	/*
	 * We are protected by having the dev_pending set.
	 */
	if (!tapfds[i]->sysfs_set && xen_class) {
		class_device_create(xen_class, NULL,
				    MKDEV(blktap_major, ret), NULL,
				    "blktap%d", ret);
		tapfds[i]->sysfs_set = 1;
	}
	return ret;
}

int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
{
	int i;
		
	for (i = 0; i < MAX_TAP_DEV; i++)
		if ( (translate_domid[i].domid == domid)
		    && (translate_domid[i].busid == xenbus_id) ) {
			tapfds[i]->blkif = blkif;
			tapfds[i]->status = RUNNING;
			return i;
		}
	return -1;
}

void signal_tapdisk(int idx) 
{
	tap_blkif_t *info;
	struct task_struct *ptask;

	info = tapfds[idx];
	if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
		ptask = find_task_by_pid(info->pid);
		if (ptask)
			info->status = CLEANSHUTDOWN;
	}
	info->blkif = NULL;
	return;
}

static int blktap_open(struct inode *inode, struct file *filp)
{
	blkif_sring_t *sring;
	int idx = iminor(inode) - BLKTAP_MINOR;
	tap_blkif_t *info;
	int i;
	
	if (tapfds[idx] == NULL) {
		WPRINTK("Unable to open device /dev/xen/blktap%d\n",
		       idx);
		return -ENOMEM;
	}
	DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
	
	info = tapfds[idx];
	
	/*Only one process can access device at a time*/
	if (test_and_set_bit(0, &info->dev_inuse))
		return -EBUSY;

	info->dev_pending = 0;
	    
	/* Allocate the fe ring. */
	sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
	if (sring == NULL)
		goto fail_nomem;

	SetPageReserved(virt_to_page(sring));
    
	SHARED_RING_INIT(sring);
	FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
	
	filp->private_data = info;
	info->vma = NULL;

	info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, 
				GFP_KERNEL);
	
	if (idx > 0) {
		init_waitqueue_head(&info->wait);
		for (i = 0; i < MAX_PENDING_REQS; i++) 
			info->idx_map[i] = INVALID_REQ;
	}

	DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
	return 0;

 fail_nomem:
	return -ENOMEM;
}

static int blktap_release(struct inode *inode, struct file *filp)
{
	tap_blkif_t *info = filp->private_data;
	
	/* can this ever happen? - sdr */
	if (!info) {
		WPRINTK("Trying to free device that doesn't exist "
		       "[/dev/xen/blktap%d]\n",iminor(inode) - BLKTAP_MINOR);
		return -EBADF;
	}
	info->dev_inuse = 0;
	DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);

	/* Free the ring page. */
	ClearPageReserved(virt_to_page(info->ufe_ring.sring));
	free_page((unsigned long) info->ufe_ring.sring);

	/* Clear any active mappings and free foreign map table */
	if (info->vma) {
		zap_page_range(
			info->vma, info->vma->vm_start, 
			info->vma->vm_end - info->vma->vm_start, NULL);
		info->vma = NULL;
	}
	
	if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
		kthread_stop(info->blkif->xenblkd);
		info->blkif->xenblkd = NULL;
		info->status = CLEANSHUTDOWN;
	}	
	return 0;
}


/* Note on mmap:
 * We need to map pages to user space in a way that will allow the block
 * subsystem set up direct IO to them.  This couldn't be done before, because
 * there isn't really a sane way to translate a user virtual address down to a 
 * physical address when the page belongs to another domain.
 *
 * My first approach was to map the page in to kernel memory, add an entry
 * for it in the physical frame list (using alloc_lomem_region as in blkback)
 * and then attempt to map that page up to user space.  This is disallowed
 * by xen though, which realizes that we don't really own the machine frame
 * underlying the physical page.
 *
 * The new approach is to provide explicit support for this in xen linux.
 * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
 * mapped from other vms.  vma->vm_private_data is set up as a mapping 
 * from pages to actual page structs.  There is a new clause in get_user_pages
 * that does the right thing for this sort of mapping.
 */
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
{
	int size;
	struct page **map;
	int i;
	tap_blkif_t *info = filp->private_data;

	if (info == NULL) {
		WPRINTK("blktap: mmap, retrieving idx failed\n");
		return -ENOMEM;
	}
	
	vma->vm_flags |= VM_RESERVED;
	vma->vm_ops = &blktap_vm_ops;

	size = vma->vm_end - vma->vm_start;
	if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
		WPRINTK("you _must_ map exactly %d pages!\n",
		       mmap_pages + RING_PAGES);
		return -EAGAIN;
	}

	size >>= PAGE_SHIFT;
	info->rings_vstart = vma->vm_start;
	info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
    
	/* Map the ring pages to the start of the region and reserve it. */
	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

	if (remap_pfn_range(vma, vma->vm_start, 
			    __pa(info->ufe_ring.sring) >> PAGE_SHIFT, 
			    PAGE_SIZE, vma->vm_page_prot)) {
		WPRINTK("Mapping user ring failed!\n");
		goto fail;
	}

	/* Mark this VM as containing foreign pages, and set up mappings. */
	map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
		      * sizeof(struct page_struct*),
		      GFP_KERNEL);
	if (map == NULL) {
		WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
		goto fail;
	}

	for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
		map[i] = NULL;
    
	vma->vm_private_data = map;
	vma->vm_flags |= VM_FOREIGN;

	info->vma = vma;
	info->ring_ok = 1;
	return 0;
 fail:
	/* Clear any active mappings. */
	zap_page_range(vma, vma->vm_start, 
		       vma->vm_end - vma->vm_start, NULL);

	return -ENOMEM;
}


static int blktap_ioctl(struct inode *inode, struct file *filp,
                        unsigned int cmd, unsigned long arg)
{
	tap_blkif_t *info = filp->private_data;

	switch(cmd) {
	case BLKTAP_IOCTL_KICK_FE: 
	{
		/* There are fe messages to process. */
		return blktap_read_ufe_ring(info);
	}
	case BLKTAP_IOCTL_SETMODE:
	{
		if (info) {
			if (BLKTAP_MODE_VALID(arg)) {
				info->mode = arg;
				/* XXX: may need to flush rings here. */
				DPRINTK("blktap: set mode to %lx\n", 
				       arg);
				return 0;
			}
		}
		return 0;
	}
	case BLKTAP_IOCTL_PRINT_IDXS:
        {
		if (info) {
			printk("User Rings: \n-----------\n");
			printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
				"| req_prod: %2d, rsp_prod: %2d\n",
				info->ufe_ring.rsp_cons,
				info->ufe_ring.req_prod_pvt,
				info->ufe_ring.sring->req_prod,
				info->ufe_ring.sring->rsp_prod);
		}
            	return 0;
        }
	case BLKTAP_IOCTL_SENDPID:
	{
		if (info) {
			info->pid = (pid_t)arg;
			DPRINTK("blktap: pid received %d\n", 
			       info->pid);
		}
		return 0;
	}
	case BLKTAP_IOCTL_NEWINTF:
	{		
		uint64_t val = (uint64_t)arg;
		domid_translate_t *tr = (domid_translate_t *)&val;
		int newdev;

		DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
		       tr->domid, tr->busid);
		newdev = get_next_free_dev();
		if (newdev < 1) {
			WPRINTK("Error initialising /dev/xen/blktap - "
				"No more devices\n");
			return -1;
		}
		translate_domid[newdev].domid = tr->domid;
		translate_domid[newdev].busid = tr->busid;
		return newdev;
	}
	case BLKTAP_IOCTL_FREEINTF:
	{
		unsigned long dev = arg;
		unsigned long flags;

		/* Looking at another device */
		info = NULL;

		if ( (dev > 0) && (dev < MAX_TAP_DEV) )
			info = tapfds[dev];

		spin_lock_irqsave(&pending_free_lock, flags);
		if ( (info != NULL) && (info->dev_pending) )
			info->dev_pending = 0;
		spin_unlock_irqrestore(&pending_free_lock, flags);

		return 0;
	}
	case BLKTAP_IOCTL_MINOR:
	{
		unsigned long dev = arg;

		/* Looking at another device */
		info = NULL;
		
		if ( (dev > 0) && (dev < MAX_TAP_DEV) )
			info = tapfds[dev];
		
		if (info != NULL)
			return info->minor;
		else
			return -1;
	}
	case BLKTAP_IOCTL_MAJOR:
		return blktap_major;

	case BLKTAP_QUERY_ALLOC_REQS:
	{
		WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
		       alloc_pending_reqs, blkif_reqs);
		return (alloc_pending_reqs/blkif_reqs) * 100;
	}
	}
	return -ENOIOCTLCMD;
}

static unsigned int blktap_poll(struct file *filp, poll_table *wait)
{
	tap_blkif_t *info = filp->private_data;
	
	if (!info) {
		WPRINTK(" poll, retrieving idx failed\n");
		return 0;
	}

	/* do not work on the control device */
	if (!info->minor)
		return 0;

	poll_wait(filp, &info->wait, wait);
	if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
		RING_PUSH_REQUESTS(&info->ufe_ring);
		return POLLIN | POLLRDNORM;
	}
	return 0;
}

void blktap_kick_user(int idx)
{
	tap_blkif_t *info;

	if (idx == 0)
		return;
	
	info = tapfds[idx];
	
	if (info != NULL)
		wake_up_interruptible(&info->wait);

	return;
}

static int do_block_io_op(blkif_t *blkif);
static void dispatch_rw_block_io(blkif_t *blkif,
				 blkif_request_t *req,
				 pending_req_t *pending_req);
static void make_response(blkif_t *blkif, unsigned long id, 
                          unsigned short op, int st);

/******************************************************************
 * misc small helpers
 */
static int req_increase(void)
{
	int i, j;

	if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) 
		return -EINVAL;

	pending_reqs[mmap_alloc]  = kzalloc(sizeof(pending_req_t)
					    * blkif_reqs, GFP_KERNEL);
	foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);

	if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
		goto out_of_memory;

	DPRINTK("%s: reqs=%d, pages=%d\n",
		__FUNCTION__, blkif_reqs, mmap_pages);

	for (i = 0; i < MAX_PENDING_REQS; i++) {
		list_add_tail(&pending_reqs[mmap_alloc][i].free_list, 
			      &pending_free);
		pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
		for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
			BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, 
								 i, j));
	}

	mmap_alloc++;
	DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
	return 0;

 out_of_memory:
	free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
	kfree(pending_reqs[mmap_alloc]);
	WPRINTK("%s: out of memory\n", __FUNCTION__);
	return -ENOMEM;
}

static void mmap_req_del(int mmap)
{
	BUG_ON(!spin_is_locked(&pending_free_lock));

	kfree(pending_reqs[mmap]);
	pending_reqs[mmap] = NULL;

	free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
	foreign_pages[mmap] = NULL;

	mmap_lock = 0;
	DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
	mmap_alloc--;
}

static pending_req_t* alloc_req(void)
{
	pending_req_t *req = NULL;
	unsigned long flags;

	spin_lock_irqsave(&pending_free_lock, flags);

	if (!list_empty(&pending_free)) {
		req = list_entry(pending_free.next, pending_req_t, free_list);
		list_del(&req->free_list);
	}

	if (req) {
		req->inuse = 1;
		alloc_pending_reqs++;
	}
	spin_unlock_irqrestore(&pending_free_lock, flags);

	return req;
}

static void free_req(pending_req_t *req)
{
	unsigned long flags;
	int was_empty;

	spin_lock_irqsave(&pending_free_lock, flags);

	alloc_pending_reqs--;
	req->inuse = 0;
	if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
		mmap_inuse--;
		if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
		spin_unlock_irqrestore(&pending_free_lock, flags);
		return;
	}
	was_empty = list_empty(&pending_free);
	list_add(&req->free_list, &pending_free);

	spin_unlock_irqrestore(&pending_free_lock, flags);

	if (was_empty)
		wake_up(&pending_free_wq);
}

static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int 
			    tapidx)
{
	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
	unsigned int i, invcount = 0;
	struct grant_handle_pair *khandle;
	uint64_t ptep;
	int ret, mmap_idx;
	unsigned long kvaddr, uvaddr;

	tap_blkif_t *info = tapfds[tapidx];
	
	if (info == NULL) {
		WPRINTK("fast_flush: Couldn't get info!\n");
		return;
	}
	mmap_idx = req->mem_idx;

	for (i = 0; i < req->nr_pages; i++) {
		kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
		uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);

		khandle = &pending_handle(mmap_idx, k_idx, i);
		if (BLKTAP_INVALID_HANDLE(khandle)) {
			WPRINTK("BLKTAP_INVALID_HANDLE\n");
			continue;
		}
		gnttab_set_unmap_op(&unmap[invcount], 
				    idx_to_kaddr(mmap_idx, k_idx, i), 
				    GNTMAP_host_map, khandle->kernel);
		invcount++;

		if (create_lookup_pte_addr(
		    info->vma->vm_mm,
		    MMAP_VADDR(info->user_vstart, u_idx, i), 
		    &ptep) !=0) {
			WPRINTK("Couldn't get a pte addr!\n");
			return;
		}

		gnttab_set_unmap_op(&unmap[invcount], 
			ptep, GNTMAP_host_map,
			khandle->user);
		invcount++;
            
		BLKTAP_INVALIDATE_HANDLE(khandle);
	}
	ret = HYPERVISOR_grant_table_op(
		GNTTABOP_unmap_grant_ref, unmap, invcount);
	BUG_ON(ret);
	
	if (info->vma != NULL)
		zap_page_range(info->vma, 
			       MMAP_VADDR(info->user_vstart, u_idx, 0), 
			       req->nr_pages << PAGE_SHIFT, NULL);
}

/******************************************************************
 * SCHEDULER FUNCTIONS
 */

static void print_stats(blkif_t *blkif)
{
	printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
	       current->comm, blkif->st_oo_req,
	       blkif->st_rd_req, blkif->st_wr_req);
	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
	blkif->st_rd_req = 0;
	blkif->st_wr_req = 0;
	blkif->st_oo_req = 0;
}

int tap_blkif_schedule(void *arg)
{
	blkif_t *blkif = arg;

	blkif_get(blkif);

	if (debug_lvl)
		printk(KERN_DEBUG "%s: started\n", current->comm);

	while (!kthread_should_stop()) {
		wait_event_interruptible(
			blkif->wq,
			blkif->waiting_reqs || kthread_should_stop());
		wait_event_interruptible(
			pending_free_wq,
			!list_empty(&pending_free) || kthread_should_stop());

		blkif->waiting_reqs = 0;
		smp_mb(); /* clear flag *before* checking for work */

		if (do_block_io_op(blkif))
			blkif->waiting_reqs = 1;

		if (log_stats && time_after(jiffies, blkif->st_print))
			print_stats(blkif);
	}

	if (log_stats)
		print_stats(blkif);
	if (debug_lvl)
		printk(KERN_DEBUG "%s: exiting\n", current->comm);

	blkif->xenblkd = NULL;
	blkif_put(blkif);

	return 0;
}

/******************************************************************
 * COMPLETION CALLBACK -- Called by user level ioctl()
 */

static int blktap_read_ufe_ring(tap_blkif_t *info)
{
	/* This is called to read responses from the UFE ring. */
	RING_IDX i, j, rp;
	blkif_response_t *resp;
	blkif_t *blkif=NULL;
	int pending_idx, usr_idx, mmap_idx;
	pending_req_t *pending_req;
	
	if (!info)
		return 0;

	/* We currently only forward packets in INTERCEPT_FE mode. */
	if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
		return 0;

	/* for each outstanding message on the UFEring  */
	rp = info->ufe_ring.sring->rsp_prod;
	rmb();
        
	for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
		resp = RING_GET_RESPONSE(&info->ufe_ring, i);
		++info->ufe_ring.rsp_cons;

		/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
		usr_idx = (int)resp->id;
		pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
		mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);

		if ( (mmap_idx >= mmap_alloc) || 
		   (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
			WPRINTK("Incorrect req map"
			       "[%d], internal map [%d,%d (%d)]\n", 
			       usr_idx, mmap_idx, 
			       ID_TO_IDX(info->idx_map[usr_idx]),
			       MASK_PEND_IDX(
				       ID_TO_IDX(info->idx_map[usr_idx])));

		pending_req = &pending_reqs[mmap_idx][pending_idx];
		blkif = pending_req->blkif;

		for (j = 0; j < pending_req->nr_pages; j++) {

			unsigned long kvaddr, uvaddr;
			struct page **map = info->vma->vm_private_data;
			struct page *pg;
			int offset;

			uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
			kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j);

			pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
			ClearPageReserved(pg);
			offset = (uvaddr - info->vma->vm_start) 
				>> PAGE_SHIFT;
			map[offset] = NULL;
		}
		fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
		make_response(blkif, pending_req->id, resp->operation,
			      resp->status);
		info->idx_map[usr_idx] = INVALID_REQ;
		blkif_put(pending_req->blkif);
		free_req(pending_req);
	}
		
	return 0;
}


/******************************************************************************
 * NOTIFICATION FROM GUEST OS.
 */

static void blkif_notify_work(blkif_t *blkif)
{
	blkif->waiting_reqs = 1;
	wake_up(&blkif->wq);
}

irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
{
	blkif_notify_work(dev_id);
	return IRQ_HANDLED;
}



/******************************************************************
 * DOWNWARD CALLS -- These interface with the block-device layer proper.
 */
static int print_dbug = 1;
static int do_block_io_op(blkif_t *blkif)
{
	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
	blkif_request_t *req;
	pending_req_t *pending_req;
	RING_IDX rc, rp;
	int more_to_do = 0;
	tap_blkif_t *info;

	rc = blk_ring->req_cons;
	rp = blk_ring->sring->req_prod;
	rmb(); /* Ensure we see queued requests up to 'rp'. */

	/*Check blkif has corresponding UE ring*/
	if (blkif->dev_num == -1) {
		/*oops*/
		if (print_dbug) {
			WPRINTK("Corresponding UE " 
			       "ring does not exist!\n");
			print_dbug = 0; /*We only print this message once*/
		}
		return 0;
	}

	info = tapfds[blkif->dev_num];
	if (info == NULL || !info->dev_inuse) {
		if (print_dbug) {
			WPRINTK("Can't get UE info!\n");
			print_dbug = 0;
		}
		return 0;
	}

	while (rc != rp) {
		
		if (RING_FULL(&info->ufe_ring)) {
			WPRINTK("RING_FULL! More to do\n");
			more_to_do = 1;
			break;
		}
		
		if (RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
			WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
			       " More to do\n");
			more_to_do = 1;
			break;		
		}

		pending_req = alloc_req();
		if (NULL == pending_req) {
			blkif->st_oo_req++;
			more_to_do = 1;
			break;
		}

		req = RING_GET_REQUEST(blk_ring, rc);
		blk_ring->req_cons = ++rc; /* before make_response() */	

		switch (req->operation) {
		case BLKIF_OP_READ:
			blkif->st_rd_req++;
			dispatch_rw_block_io(blkif, req, pending_req);
			break;

		case BLKIF_OP_WRITE:
			blkif->st_wr_req++;
			dispatch_rw_block_io(blkif, req, pending_req);
			break;

		default:
			WPRINTK("unknown operation [%d]\n",
				req->operation);
			make_response(blkif, req->id, req->operation,
				      BLKIF_RSP_ERROR);
			free_req(pending_req);
			break;
		}
	}
		
	blktap_kick_user(blkif->dev_num);

	return more_to_do;
}

static void dispatch_rw_block_io(blkif_t *blkif,
				 blkif_request_t *req,
				 pending_req_t *pending_req)
{
	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
	int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
	unsigned int nseg;
	int ret, i;
	tap_blkif_t *info = tapfds[blkif->dev_num];
	uint64_t sector;
	
	blkif_request_t *target;
	int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
	int usr_idx = GET_NEXT_REQ(info->idx_map);
	uint16_t mmap_idx = pending_req->mem_idx;

	/*Check we have space on user ring - should never fail*/
	if(usr_idx == INVALID_REQ) goto fail_flush;
	
	/* Check that number of segments is sane. */
	nseg = req->nr_segments;
	if ( unlikely(nseg == 0) || 
	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
		WPRINTK("Bad number of segments in request (%d)\n", nseg);
		goto fail_response;
	}
	
	/* Make sure userspace is ready. */
	if (!info->ring_ok) {
		WPRINTK("blktap: ring not ready for requests!\n");
		goto fail_response;
	}

	if (RING_FULL(&info->ufe_ring)) {
		WPRINTK("blktap: fe_ring is full, can't add "
			"IO Request will be dropped. %d %d\n",
			RING_SIZE(&info->ufe_ring),
			RING_SIZE(&blkif->blk_ring));
		goto fail_response;
	}

	pending_req->blkif     = blkif;
	pending_req->id        = req->id;
	pending_req->operation = operation;
	pending_req->status    = BLKIF_RSP_OKAY;
	pending_req->nr_pages  = nseg;
	op = 0;
	for (i = 0; i < nseg; i++) {
		unsigned long uvaddr;
		unsigned long kvaddr;
		uint64_t ptep;
		struct page *page;
		uint32_t flags;

		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
		kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
		page = virt_to_page(kvaddr);

		sector = req->sector_number + (8*i);
		if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
			WPRINTK("BLKTAP: Sector request greater" 
			       "than size\n");
			WPRINTK("BLKTAP: %s request sector" 
			       "[%llu,%llu], Total [%llu]\n",
			       (req->operation == 
				BLKIF_OP_WRITE ? "WRITE" : "READ"),
				(long long unsigned) sector,
				(long long unsigned) sector>>9,
				blkif->sectors);
		}

		flags = GNTMAP_host_map;
		if (operation == WRITE)
			flags |= GNTMAP_readonly;
		gnttab_set_map_op(&map[op], kvaddr, flags,
				  req->seg[i].gref, blkif->domid);
		op++;

		/* Now map it to user. */
		ret = create_lookup_pte_addr(info->vma->vm_mm, 
					     uvaddr, &ptep);
		if (ret) {
			WPRINTK("Couldn't get a pte addr!\n");
			fast_flush_area(pending_req, pending_idx, usr_idx, 
					blkif->dev_num);
			goto fail_flush;
		}

		flags = GNTMAP_host_map | GNTMAP_application_map
			| GNTMAP_contains_pte;
		if (operation == WRITE)
			flags |= GNTMAP_readonly;
		gnttab_set_map_op(&map[op], ptep, flags,
				  req->seg[i].gref, blkif->domid);
		op++;
	}

	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
	BUG_ON(ret);

	for (i = 0; i < (nseg*2); i+=2) {
		unsigned long uvaddr;
		unsigned long kvaddr;
		unsigned long offset;
		struct page *pg;

		uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
		kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2);

		if (unlikely(map[i].status != 0)) {
			WPRINTK("invalid kernel buffer -- "
				"could not remap it\n");
			goto fail_flush;
		}

		if (unlikely(map[i+1].status != 0)) {
			WPRINTK("invalid user buffer -- "
				"could not remap it\n");
			goto fail_flush;
		}

		pending_handle(mmap_idx, pending_idx, i/2).kernel 
			= map[i].handle;
		pending_handle(mmap_idx, pending_idx, i/2).user   
			= map[i+1].handle;
		set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
			FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
		offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
		((struct page **)info->vma->vm_private_data)[offset] =
			pg;
	}
	/* Mark mapped pages as reserved: */
	for (i = 0; i < req->nr_segments; i++) {
		unsigned long kvaddr;
		struct page *pg;

		kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
		pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
		SetPageReserved(pg);
	}
	
	/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
	info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);

	blkif_get(blkif);
	/* Finally, write the request message to the user ring. */
	target = RING_GET_REQUEST(&info->ufe_ring,
				  info->ufe_ring.req_prod_pvt);
	memcpy(target, req, sizeof(*req));
	target->id = usr_idx;
	info->ufe_ring.req_prod_pvt++;
	return;

 fail_flush:
	WPRINTK("Reached Fail_flush\n");
	fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
 fail_response:
	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
	free_req(pending_req);
} 



/******************************************************************
 * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
 */


static void make_response(blkif_t *blkif, unsigned long id, 
                          unsigned short op, int st)
{
	blkif_response_t *resp;
	unsigned long     flags;
	blkif_back_ring_t *blk_ring = &blkif->blk_ring;
	int more_to_do = 0;
	int notify;

	spin_lock_irqsave(&blkif->blk_ring_lock, flags);
	/* Place on the response ring for the relevant domain. */ 
	resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
	resp->id        = id;
	resp->operation = op;
	resp->status    = st;
	blk_ring->rsp_prod_pvt++;
	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);

	if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
		/*
		 * Tail check for pending requests. Allows frontend to avoid
		 * notifications if requests are already in flight (lower
		 * overheads and promotes batching).
		 */
		RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
	} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
		more_to_do = 1;

	}	
	spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
	if (more_to_do)
		blkif_notify_work(blkif);
	if (notify)
		notify_remote_via_irq(blkif->irq);
}

static int __init blkif_init(void)
{
	int i,ret,blktap_dir;
	tap_blkif_t *info;

	if (!is_running_on_xen())
		return -ENODEV;

	INIT_LIST_HEAD(&pending_free);
        for(i = 0; i < 2; i++) {
		ret = req_increase();
		if (ret)
			break;
	}
	if (i == 0)
		return ret;

	tap_blkif_interface_init();

	alloc_pending_reqs = 0;

	tap_blkif_xenbus_init();

	/*Create the blktap devices, but do not map memory or waitqueue*/
	for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;

	/* Dynamically allocate a major for this device */
	ret = register_chrdev(0, "blktap", &blktap_fops);
	blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);

	if ( (ret < 0)||(blktap_dir < 0) ) {
		WPRINTK("Couldn't register /dev/xen/blktap\n");
		return -ENOMEM;
	}	
	
	blktap_major = ret;

	for(i = 0; i < MAX_TAP_DEV; i++ ) {
		info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
		if(tapfds[i] == NULL)
			return -ENOMEM;
		info->minor = i;
		info->pid = 0;
		info->blkif = NULL;

		ret = devfs_mk_cdev(MKDEV(blktap_major, i),
			S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);

		if(ret != 0)
			return -ENOMEM;
		info->dev_pending = info->dev_inuse = 0;

		DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
	}
	
	/* Make sure the xen class exists */
	if (!setup_xen_class()) {
		/*
		 * This will allow udev to create the blktap ctrl device.
		 * We only want to create blktap0 first.  We don't want
		 * to flood the sysfs system with needless blktap devices.
		 * We only create the device when a request of a new device is
		 * made.
		 */
		class_device_create(xen_class, NULL,
				    MKDEV(blktap_major, 0), NULL,
				    "blktap0");
		tapfds[0]->sysfs_set = 1;
	} else {
		/* this is bad, but not fatal */
		WPRINTK("blktap: sysfs xen_class not created\n");
	}

	DPRINTK("Blktap device successfully created\n");

	return 0;
}

module_init(blkif_init);

MODULE_LICENSE("Dual BSD/GPL");